diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index a578910b65..89af576711 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -171,37 +171,7 @@ " return bbq.json_value(get_metadata(series), \"$.size\").astype(\"Int64\")\n", "\n", "def get_updated(series):\n", - " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)\n", - "\n", - "def display_blob(series, n=3):\n", - " import IPython.display as ipy_display\n", - " import pandas as pd\n", - " import requests\n", - " \n", - " # Retrieve access URLs and content types\n", - " runtime_json = bbq.to_json_string(bbq.obj.get_access_url(series, mode=\"R\"))\n", - " read_url = bbq.json_value(runtime_json, \"$.access_urls.read_url\")\n", - " content_type = get_content_type(series)\n", - " \n", - " # Pull to pandas to display\n", - " pdf = bpd.DataFrame({\"read_url\": read_url, \"content_type\": content_type}).head(n).to_pandas()\n", - " \n", - " width = bigframes.options.display.blob_display_width\n", - " height = bigframes.options.display.blob_display_height\n", - " \n", - " for _, row in pdf.iterrows():\n", - " if pd.isna(row[\"read_url\"]):\n", - " ipy_display.display(\"\")\n", - " elif pd.isna(row[\"content_type\"]):\n", - " ipy_display.display(requests.get(row[\"read_url\"]).content)\n", - " elif row[\"content_type\"].casefold().startswith(\"image\"):\n", - " ipy_display.display(ipy_display.Image(url=row[\"read_url\"], width=width, height=height))\n", - " elif row[\"content_type\"].casefold().startswith(\"audio\"):\n", - " ipy_display.display(ipy_display.Audio(requests.get(row[\"read_url\"]).content))\n", - " elif row[\"content_type\"].casefold().startswith(\"video\"):\n", - " ipy_display.display(ipy_display.Video(row[\"read_url\"], width=width, height=height))\n", - " else:\n", - " ipy_display.display(requests.get(row[\"read_url\"]).content)" + " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ] }, { @@ -229,13 +199,7 @@ "# Create blob columns from wildcard path.\n", "df_image = bpd.from_glob_path(\n", " \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n", - ")\n", - "# Other ways are: from string uri column\n", - "# df = bpd.DataFrame({\"uri\": [\"gs:///\", \"gs:///\"]})\n", - "# df[\"blob_col\"] = df[\"uri\"].str.to_blob()\n", - "\n", - "# From an existing object table\n", - "# df = bpd.read_gbq_object_table(\"\", name=\"blob_col\")" + ")" ] }, { @@ -254,7 +218,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", @@ -289,23 +253,23 @@ " \n", " \n", " 0\n", - " \n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " \n", " \n", "\n", @@ -314,11 +278,11 @@ ], "text/plain": [ " image\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", "\n", "[5 rows x 1 columns]" ] @@ -363,7 +327,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", @@ -402,7 +366,7 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", @@ -410,7 +374,7 @@ " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", @@ -418,7 +382,7 @@ " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", @@ -426,7 +390,7 @@ " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", @@ -434,7 +398,7 @@ " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", @@ -447,11 +411,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", "\n", " size updated \n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -478,294 +442,48 @@ "df_image" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "NUd4Kog_QLRS" - }, - "source": [ - "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 75 - }, - "id": "UGuAk9PNDRF3", - "outputId": "73feb33d-4a05-48fb-96e5-3c48c2a456f3" - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# filter images and display, you can also display audio and video types\n", - "display_blob(df_image[df_image[\"author\"] == \"alice\"][\"image\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1IJuakwJTZey" - }, - "source": [ - "### 3. Conduct image transformations\n", - "BigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VWsl5BBPJ6N7", - "outputId": "45d2356e-322b-4982-cfa7-42d034dc4344" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n" - ] - } - ], - "source": [ - "df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n", - " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n", - ")\n", - "df_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n", - " (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", engine=\"opencv\"\n", - ")\n", - "df_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n", - " alpha=50.0,\n", - " beta=150.0,\n", - " norm_type=\"minmax\",\n", - " dst=f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n", - " engine=\"opencv\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "rWCAGC8w64vU", - "outputId": "d7d456f0-8b56-492c-fe1b-967e9664d813" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n" - ] - } - ], - "source": [ - "# You can also chain functions together\n", - "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Using `verbose` mode for detailed output\\n\n", - "\\n\n", - "All multimodal functions support a `verbose` parameter, which defaults to `False`.\\n\n", - "\\n\n", - "* When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\\n\n", - "* When `verbose=True`, the function returns a `STRUCT` containing two fields:\\n\n", - " * `content`: The main result of the operation.\\n\n", - " * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\\n\n", - "\\n\n", - "Using `verbose=True` is highly recommended for debugging and for workflows where you need to handle potential failures on a row-by-row basis. Let's see it in action with the `image_blur` function." + "### 3. Conduct image transformations" ] }, { - "cell_type": "code", - "execution_count": 10, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
blurred_verbose
0{'status': '', 'content': {'uri': 'gs://bigfra...
1{'status': '', 'content': {'uri': 'gs://bigfra...
2{'status': '', 'content': {'uri': 'gs://bigfra...
3{'status': '', 'content': {'uri': 'gs://bigfra...
4{'status': '', 'content': {'uri': 'gs://bigfra...
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" - ], - "text/plain": [ - " blurred_verbose\n", - "0 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "1 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "2 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "3 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "4 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "\n", - "[5 rows x 1 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "df_image[\"blurred_verbose\"] = df_image[\"image\"].blob.image_blur(\n", - " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose/\", engine=\"opencv\", verbose=True\n", - ")\n", - "df_image[[\"blurred_verbose\"]]" + "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 605 + "height": 487 }, - "id": "6NGK6GYSU44B", - "outputId": "859101c1-2ee4-4f9a-e250-e8947127420a" + "id": "HhCb8jRsLe9B", + "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { @@ -790,148 +508,136 @@ " \n", " \n", " image\n", - " author\n", - " content_type\n", - " size\n", - " updated\n", " blurred\n", - " resized\n", - " normalized\n", - " blur_resized\n", - " blurred_verbose\n", " \n", " \n", " \n", " \n", " 0\n", - " \n", - " alice\n", - " image/png\n", - " 1591240\n", - " 2025-03-20 17:45:04+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-paw-balm.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", " \n", " \n", " 1\n", - " \n", - " bob\n", - " image/png\n", - " 1182951\n", - " 2025-03-20 17:45:02+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-hot-spot-spray.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", " \n", " \n", " 2\n", - " \n", - " bob\n", - " image/png\n", - " 1520884\n", - " 2025-03-20 17:44:55+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/fluffy-buns-chinchilla-food-variety-pack.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", " \n", " \n", " 3\n", - " \n", - " alice\n", - " image/png\n", - " 1235401\n", - " 2025-03-20 17:45:19+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/purrfect-perch-cat-scratcher.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", " \n", " \n", " 4\n", - " \n", - " bob\n", - " image/png\n", - " 1591923\n", - " 2025-03-20 17:44:47+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/chirpy-seed-deluxe-bird-food.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", " \n", " \n", "\n", - "

5 rows × 10 columns

\n", - "[5 rows x 10 columns in total]" + "

5 rows × 2 columns

\n", + "[5 rows x 2 columns in total]" ], "text/plain": [ - " image author content_type \\\n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "\n", - " size updated \\\n", - "0 1591240 2025-03-20 17:45:04+00:00 \n", - "1 1182951 2025-03-20 17:45:02+00:00 \n", - "2 1520884 2025-03-20 17:44:55+00:00 \n", - "3 1235401 2025-03-20 17:45:19+00:00 \n", - "4 1591923 2025-03-20 17:44:47+00:00 \n", - "\n", - " blurred \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "\n", - " resized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "\n", - " normalized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_normal... \n", + " image \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", "\n", - " blur_resized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", + " blurred \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", "\n", - " blurred_verbose \n", - "0 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "1 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "2 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "3 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "4 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "\n", - "[5 rows x 10 columns]" + "[5 rows x 2 columns]" ] }, - "execution_count": 11, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_image" + "# Construct the canonical connection ID\n", + "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", + "\n", + "@bpd.udf(\n", + " input_types=[str, str, int, int],\n", + " output_type=str,\n", + " dataset=DATASET_ID,\n", + " name=\"image_blur\",\n", + " bigquery_connection=FULL_CONNECTION_ID,\n", + " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", + ")\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", + " import json\n", + " import cv2 as cv\n", + " import numpy as np\n", + " import requests\n", + " import base64\n", + "\n", + " src_obj = json.loads(src_rt)\n", + " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", + " \n", + " response = requests.get(src_url, timeout=30)\n", + " response.raise_for_status()\n", + " \n", + " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " if img is None:\n", + " raise ValueError(\"cv.imdecode failed\")\n", + " \n", + " kx, ky = int(kx), int(ky)\n", + " img_blurred = cv.blur(img, ksize=(kx, ky))\n", + " \n", + " success, encoded = cv.imencode(\".jpeg\", img_blurred)\n", + " if not success:\n", + " raise ValueError(\"cv.imencode failed\")\n", + " \n", + " # Handle two output modes\n", + " if dst_rt: # GCS/Series output mode\n", + " dst_obj = json.loads(dst_rt)\n", + " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", + " \n", + " requests.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", + " \n", + " uri = dst_obj[\"objectref\"][\"uri\"]\n", + " return uri\n", + " \n", + " else: # BigQuery bytes output mode \n", + " image_bytes = encoded.tobytes()\n", + " return base64.b64encode(image_bytes).decode()\n", + "\n", + "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", + " import os\n", + " dst_folder = os.path.join(dst_folder, \"\")\n", + " # Fetch metadata to get the URI\n", + " metadata = bbq.obj.fetch_metadata(series)\n", + " current_uri = metadata.struct.field(\"uri\")\n", + " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", + " dst_blob = dst_uri.str.to_blob(connection=FULL_CONNECTION_ID)\n", + " df_transform = bpd.DataFrame({\n", + " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", + " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", + " })\n", + " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n", + " udf, axis=1, args=args\n", + " )\n", + " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", + "\n", + "# Apply transformations\n", + "df_image[\"blurred\"] = apply_transformation(\n", + " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n", + " image_blur, 20, 20\n", + ")\n", + "df_image[[\"image\", \"blurred\"]]" ] }, { @@ -945,7 +651,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": { "id": "mRUGfcaFVW-3" }, @@ -954,7 +660,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", "default model will be removed in BigFrames 3.0. Please supply an\n", "explicit model to avoid this message.\n", " return method(*args, **kwargs)\n" @@ -968,7 +674,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -982,22 +688,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { @@ -1028,69 +730,84 @@ " \n", " \n", " 0\n", - " The item is a tin of K9 Guard dog paw balm.\n", - " \n", + " The item is a container of K9 Guard Dog Paw Balm.\n", + " \n", " \n", " \n", " 1\n", " The item is K9 Guard Dog Hot Spot Spray.\n", - " \n", + " \n", + " \n", + " \n", + " 2\n", + " The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.\n", + " \n", + " \n", + " \n", + " 3\n", + " The item is a cat tree.\\n\n", + " \n", + " \n", + " \n", + " 4\n", + " The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n\n", + " \n", " \n", " \n", "\n", - "

2 rows × 2 columns

\n", - "[2 rows x 2 columns in total]" + "

5 rows × 2 columns

\n", + "[5 rows x 2 columns in total]" ], "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The item is a tin of K9 Guard dog paw balm. \n", - "1 The item is K9 Guard Dog Hot Spot Spray. \n", + " ml_generate_text_llm_result \\\n", + "0 The item is a container of K9 Guard Dog Paw Balm. \n", + "1 The item is K9 Guard Dog Hot Spot Spray. \n", + "2 The image contains three bags of food, likely ... \n", + "3 The item is a cat tree.\\n \n", + "4 The item is a bag of bird seed. Specifically, ... \n", "\n", " image \n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", "\n", - "[2 rows x 2 columns]" + "[5 rows x 2 columns]" ] }, - "execution_count": 13, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Ask the same question on the images\n", - "df_image = df_image.head(2)\n", "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", "answer[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": { "id": "IG3J3HsKhyBY" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - } - ], + "outputs": [], "source": [ "# Ask different questions\n", - "df_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]" + "df_image[\"question\"] = [\n", + " \"what item is it?\",\n", + " \"what color is the picture?\",\n", + " \"what is the product name?\",\n", + " \"is it for pets?\",\n", + " \"what is the weight of the product?\",\n", + "]" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1104,22 +821,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { @@ -1150,32 +863,53 @@ " \n", " \n", " 0\n", - " The item is a tin of K9Guard Dog Paw Balm.\n", - " \n", + " The item is a container of Dog Paw Balm.\n", + " \n", " \n", " \n", " 1\n", - " The bottle is mostly white, with a light blue accents. The background is a light gray. There are also black and green elements on the bottle's label.\n", - " \n", + " The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.\n", + " \n", + " \n", + " \n", + " 2\n", + " Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.\n", + " \n", + " \n", + " \n", + " 3\n", + " Yes, it is for pets. It appears to be a cat tree or scratching post.\\n\n", + " \n", + " \n", + " \n", + " 4\n", + " The image shows that the weight of the product is 15 oz/ 257g.\n", + " \n", " \n", " \n", "\n", - "

2 rows × 2 columns

\n", - "[2 rows x 2 columns in total]" + "

5 rows × 2 columns

\n", + "[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", - "0 The item is a tin of K9Guard Dog Paw Balm. \n", - "1 The bottle is mostly white, with a light blue ... \n", + "0 The item is a container of Dog Paw Balm. \n", + "1 The picture contains many colors, including wh... \n", + "2 Here are the product names from the image:\\n\\n... \n", + "3 Yes, it is for pets. It appears to be a cat tr... \n", + "4 The image shows that the weight of the product... \n", "\n", " image \n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", "\n", - "[2 rows x 2 columns]" + "[5 rows x 2 columns]" ] }, - "execution_count": 15, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1187,7 +921,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1201,19 +935,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", "default model will be removed in BigFrames 3.0. Please supply an\n", "explicit model to avoid this message.\n", " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" @@ -1250,46 +982,82 @@ " \n", " \n", " 0\n", - " [ 0.00638842 0.01666344 0.00451782 ... -0.02...\n", + " [ 0.00638822 0.01666385 0.00451817 ... -0.02...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", " \n", " \n", " 1\n", - " [ 0.00973689 0.02148374 0.00244311 ... 0.00...\n", + " [ 0.00973976 0.02148137 0.0024429 ... 0.00...\n", + " \n", + " <NA>\n", + " <NA>\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", + " \n", + " \n", + " 2\n", + " [ 0.01195884 0.02139394 0.05968047 ... -0.01...\n", + " \n", + " <NA>\n", + " <NA>\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", + " \n", + " \n", + " 3\n", + " [-0.02621161 0.02797648 0.04416926 ... -0.01...\n", + " \n", + " <NA>\n", + " <NA>\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", + " \n", + " \n", + " 4\n", + " [ 0.05918628 0.0125137 0.01907336 ... 0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", " \n", " \n", "\n", - "

2 rows × 5 columns

\n", - "[2 rows x 5 columns in total]" + "

5 rows × 5 columns

\n", + "[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", - "0 [ 0.00638842 0.01666344 0.00451782 ... -0.02... \n", - "1 [ 0.00973689 0.02148374 0.00244311 ... 0.00... \n", + "0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n", + "1 [ 0.00973976 0.02148137 0.0024429 ... 0.00... \n", + "2 [ 0.01195884 0.02139394 0.05968047 ... -0.01... \n", + "3 [-0.02621161 0.02797648 0.04416926 ... -0.01... \n", + "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", "0 \n", "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " ml_generate_embedding_end_sec \\\n", "0 \n", "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " content \n", - "0 {\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", "\n", - "[2 rows x 5 columns]" + "[5 rows x 5 columns]" ] }, - "execution_count": 16, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1314,9 +1082,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1334,12 +1111,9 @@ " import json\n", " from pypdf import PdfReader\n", " import requests\n", - " from requests import adapters\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", " src_obj_ref_rt_json = json.loads(src_obj_ref_rt)\n", " src_url = src_obj_ref_rt_json[\"access_urls\"][\"read_url\"]\n", - " response = session.get(src_url, timeout=30, stream=True)\n", + " response = requests.get(src_url, timeout=30, stream=True)\n", " response.raise_for_status()\n", " pdf_bytes = response.content\n", " pdf_file = io.BytesIO(pdf_bytes)\n", @@ -1364,12 +1138,9 @@ " import json\n", " from pypdf import PdfReader\n", " import requests\n", - " from requests import adapters\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", " src_obj_ref_rt_json = json.loads(src_obj_ref_rt)\n", " src_url = src_obj_ref_rt_json[\"access_urls\"][\"read_url\"]\n", - " response = session.get(src_url, timeout=30, stream=True)\n", + " response = requests.get(src_url, timeout=30, stream=True)\n", " response.raise_for_status()\n", " pdf_bytes = response.content\n", " pdf_file = io.BytesIO(pdf_bytes)\n", @@ -1395,9 +1166,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" + ], + "text/plain": [ + " extracted_text \\\n", + "0 CritterCuisine Pro 5000 - Automatic Pet Feeder... \n", + "\n", + " chunked \n", + "0 [\"CritterCuisine Pro 5000 - Automatic Pet Feed... \n", + "\n", + "[1 rows x 2 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\n", "\n", @@ -1415,9 +1237,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+              "0    on a level, stable surface to prevent tipping....\n",
+              "0    included)\\nto maintain the schedule during pow...\n",
+              "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
+              "0    paperclip) for 5\\nseconds. This will reset all...\n",
+              "0    unit with a damp cloth. Do not immerse the bas...\n",
+              "0    continues,\\ncontact customer support.\\nE2: Foo...
" + ], + "text/plain": [ + "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", + "0 on a level, stable surface to prevent tipping....\n", + "0 included)\\nto maintain the schedule during pow...\n", + "0 digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n", + "0 paperclip) for 5\\nseconds. This will reset all...\n", + "0 unit with a damp cloth. Do not immerse the bas...\n", + "0 continues,\\ncontact customer support.\\nE2: Foo...\n", + "Name: chunked, dtype: string" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Explode the chunks to see each chunk as a separate row\n", "chunked = df_pdf[\"chunked\"].explode()\n", @@ -1433,7 +1282,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -1443,25 +1292,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" ] + }, + { + "data": { + "text/html": [ + "
0    Now, as all books, not primarily intended as p...
" + ], + "text/plain": [ + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", "\n", - "audio_series = df['audio']\n", + "audio_series = df[\"audio\"]\n", "prompt_text = (\n", " \"**Task:** Transcribe the provided audio. **Instructions:** - Your response \"\n", " \"must contain only the verbatim transcription of the audio. - Do not include \"\n", @@ -1486,7 +1349,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1499,7 +1362,7 @@ "Name: transcription_results, dtype: struct[pyarrow]" ] }, - "execution_count": 12, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1536,9 +1399,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1559,12 +1431,9 @@ " import json\n", " from PIL import ExifTags, Image\n", " import requests\n", - " from requests import adapters\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", " src_obj_ref_rt_json = json.loads(src_obj_ref_rt)\n", " src_url = src_obj_ref_rt_json[\"access_urls\"][\"read_url\"]\n", - " response = session.get(src_url, timeout=30)\n", + " response = requests.get(src_url, timeout=30)\n", " bts = response.content\n", " image = Image.open(io.BytesIO(bts))\n", " exif_data = image.getexif()\n", @@ -1578,9 +1447,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", + "change in future versions.\n", + " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + ], + "text/plain": [ + "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", + "Name: blob_col, dtype: extension>[pyarrow]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Create a Multimodal DataFrame from the sample image URIs\n", "exif_image_df = bpd.from_glob_path(\n", @@ -1608,7 +1501,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "venv", + "display_name": "venv (3.13.0)", "language": "python", "name": "python3" }, @@ -1622,7 +1515,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.13.0" } }, "nbformat": 4,