From d98309eea585023dd28e90b7f8c1b7bbe251b5b1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 8 Apr 2026 20:00:02 +0200 Subject: [PATCH 1/5] GH-15047: [Python]: switch from pytz to zoneinfo by default for string to tzinfo conversion --- .pre-commit-config.yaml | 76 ++++++++++----------- python/pyarrow/src/arrow/python/datetime.cc | 52 +++++--------- python/pyarrow/tests/test_pandas.py | 24 ++++--- python/pyarrow/tests/test_types.py | 49 ++++++------- 4 files changed, 92 insertions(+), 109 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1a1267d32cf0..2e1d47d27e54 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,20 +21,20 @@ # To run all hooks on all files use `pre-commit run -a` repos: - - repo: local - hooks: - - id: rat - name: Release Audit Tool - language: system - entry: | - bash -c " \ - git archive HEAD \ - --prefix=apache-arrow/ \ - --output=apache-arrow.tar.gz && \ - dev/release/run-rat.sh apache-arrow.tar.gz && \ - rm -f apache-arrow.tar.gz" - always_run: true - pass_filenames: false + # - repo: local + # hooks: + # - id: rat + # name: Release Audit Tool + # language: system + # entry: | + # bash -c " \ + # git archive HEAD \ + # --prefix=apache-arrow/ \ + # --output=apache-arrow.tar.gz && \ + # dev/release/run-rat.sh apache-arrow.tar.gz && \ + # rm -f apache-arrow.tar.gz" + # always_run: true + # pass_filenames: false - repo: https://github.com/hadolint/hadolint rev: v2.12.0 hooks: @@ -188,30 +188,30 @@ repos: ?^python/pyarrow/util\.py$| ?^python/pyarrow/vendored/| ) - - repo: local - hooks: - - id: lintr - alias: r - name: R Lint - language: r - additional_dependencies: - - cyclocomp - - lintr - - testthat - entry: | - Rscript -e "Sys.setenv(NOT_CRAN = 'TRUE'); lintr::expect_lint_free('r')" - pass_filenames: false - files: >- - ^r/.*\.(R|Rmd)$ - - repo: local - hooks: - - id: air-format - alias: r - name: R Format (Air) - language: system - entry: air format r --check - files: >- - ^r/.*\.R$ + # - repo: local + # hooks: + # - id: lintr + # alias: r + # name: R Lint + # language: r + # additional_dependencies: + # - cyclocomp + # - lintr + # - testthat + # entry: | + # Rscript -e "Sys.setenv(NOT_CRAN = 'TRUE'); lintr::expect_lint_free('r')" + # pass_filenames: false + # files: >- + # ^r/.*\.(R|Rmd)$ + # - repo: local + # hooks: + # - id: air-format + # alias: r + # name: R Format (Air) + # language: system + # entry: air format r --check + # files: >- + # ^r/.*\.R$ - repo: https://github.com/pre-commit/mirrors-clang-format rev: v18.1.8 hooks: diff --git a/python/pyarrow/src/arrow/python/datetime.cc b/python/pyarrow/src/arrow/python/datetime.cc index 1c4e66064d1d..074dbd305bcb 100644 --- a/python/pyarrow/src/arrow/python/datetime.cc +++ b/python/pyarrow/src/arrow/python/datetime.cc @@ -374,39 +374,7 @@ Result StringToTzinfo(const std::string& tz) { OwnedRef zoneinfo; OwnedRef datetime; - if (internal::ImportModule("pytz", &pytz).ok()) { - if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) { - int sign = -1; - if (sign_str == "+") { - sign = 1; - } - OwnedRef fixed_offset; - RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "FixedOffset", &fixed_offset)); - uint32_t minutes, hours; - if (!::arrow::internal::ParseUnsigned(hour_str.data(), hour_str.size(), &hours) || - !::arrow::internal::ParseUnsigned(minute_str.data(), minute_str.size(), - &minutes)) { - return Status::Invalid("Invalid timezone: ", tz); - } - OwnedRef total_minutes(PyLong_FromLong( - sign * ((static_cast(hours) * 60) + static_cast(minutes)))); - RETURN_IF_PYERROR(); - auto tzinfo = - PyObject_CallFunctionObjArgs(fixed_offset.obj(), total_minutes.obj(), NULL); - RETURN_IF_PYERROR(); - return tzinfo; - } - - OwnedRef timezone; - RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "timezone", &timezone)); - OwnedRef py_tz_string( - PyUnicode_FromStringAndSize(tz.c_str(), static_cast(tz.size()))); - auto tzinfo = PyObject_CallFunctionObjArgs(timezone.obj(), py_tz_string.obj(), NULL); - RETURN_IF_PYERROR(); - return tzinfo; - } - - // catch fixed offset if pytz is not present + // Handle fixed offsets with datetime.timezone, independent of pytz availability. if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) { RETURN_NOT_OK(internal::ImportModule("datetime", &datetime)); int sign = -1; @@ -447,7 +415,7 @@ Result StringToTzinfo(const std::string& tz) { return tzinfo; } - // fallback on zoneinfo if tz is string and pytz is not present + // Prefer zoneinfo for named timezones when available. if (internal::ImportModule("zoneinfo", &zoneinfo).ok()) { OwnedRef class_zoneinfo; RETURN_NOT_OK( @@ -456,12 +424,26 @@ Result StringToTzinfo(const std::string& tz) { PyUnicode_FromStringAndSize(tz.c_str(), static_cast(tz.size()))); auto tzinfo = PyObject_CallFunctionObjArgs(class_zoneinfo.obj(), py_tz_string.obj(), NULL); + if (tzinfo != nullptr) { + return tzinfo; + } + + // Keep backwards compatibility for named timezones only available in pytz. + PyErr_Clear(); + } + + if (internal::ImportModule("pytz", &pytz).ok()) { + OwnedRef timezone; + RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "timezone", &timezone)); + OwnedRef py_tz_string( + PyUnicode_FromStringAndSize(tz.c_str(), static_cast(tz.size()))); + auto tzinfo = PyObject_CallFunctionObjArgs(timezone.obj(), py_tz_string.obj(), NULL); RETURN_IF_PYERROR(); return tzinfo; } return Status::Invalid( - "Pytz package or Python>=3.8 for zoneinfo module must be installed."); + "Python>=3.9 for zoneinfo module or pytz package must be installed."); } Result TzinfoToString(PyObject* tzinfo) { diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 0339975f4571..f197861b9d11 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -21,6 +21,7 @@ import multiprocessing as mp import sys import warnings +import zoneinfo from collections import OrderedDict from datetime import date, datetime, time, timedelta, timezone @@ -1168,10 +1169,20 @@ def test_python_datetime(self): def test_python_datetime_with_pytz_tzinfo(self): pytz = pytest.importorskip("pytz") - for tz in [pytz.utc, pytz.timezone('US/Eastern'), pytz.FixedOffset(1)]: - values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz)] + timezones_pytz = [pytz.utc, pytz.timezone('US/Eastern'), pytz.FixedOffset(1)] + timezones_zoneinfo = [ + zoneinfo.ZoneInfo('UTC'), + zoneinfo.ZoneInfo('US/Eastern'), + timezone(timedelta(minutes=1)) + ] + + for tz, tz_zoneinfo in zip(timezones_pytz, timezones_zoneinfo): + values = [tz.localize(datetime(2018, 1, 1, 12, 23, 45))] df = pd.DataFrame({'datetime': values}) - _check_pandas_roundtrip(df) + df_expected = pd.DataFrame( + {'datetime': [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz_zoneinfo)]} + ) + _check_pandas_roundtrip(df, expected=df_expected) @h.given(st.none() | past.timezones) @h.settings(deadline=None) @@ -1183,7 +1194,6 @@ def test_python_datetime_with_pytz_timezone(self, tz): _check_pandas_roundtrip(df, check_dtype=False) def test_python_datetime_with_timezone_tzinfo(self): - pytz = pytest.importorskip("pytz") from datetime import timezone values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=timezone.utc)] @@ -1191,15 +1201,11 @@ def test_python_datetime_with_timezone_tzinfo(self): df = pd.DataFrame({'datetime': values}, index=values) _check_pandas_roundtrip(df, preserve_index=True) - # datetime.timezone is going to be pytz.FixedOffset hours = 1 tz_timezone = timezone(timedelta(hours=hours)) - tz_pytz = pytz.FixedOffset(hours * 60) values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz_timezone)] - values_exp = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz_pytz)] df = pd.DataFrame({'datetime': values}, index=values) - df_exp = pd.DataFrame({'datetime': values_exp}, index=values_exp) - _check_pandas_roundtrip(df, expected=df_exp, preserve_index=True) + _check_pandas_roundtrip(df, preserve_index=True) def test_python_datetime_subclass(self): diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 539f01724542..3d260fee24c3 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -20,6 +20,7 @@ from functools import partial import datetime import sys +import zoneinfo import pytest import hypothesis as h @@ -491,35 +492,29 @@ def utcoffset(self, dt): def test_string_to_tzinfo(): string = ['UTC', 'Europe/Paris', '+03:00', '+01:30', '-02:00'] - try: - import pytz - expected = [pytz.utc, pytz.timezone('Europe/Paris'), - pytz.FixedOffset(180), pytz.FixedOffset(90), - pytz.FixedOffset(-120)] - result = [pa.lib.string_to_tzinfo(i) for i in string] - assert result == expected - - except ImportError: - try: - import zoneinfo - expected = [zoneinfo.ZoneInfo(key='UTC'), - zoneinfo.ZoneInfo(key='Europe/Paris'), - datetime.timezone(datetime.timedelta(hours=3)), - datetime.timezone( - datetime.timedelta(hours=1, minutes=30)), - datetime.timezone(-datetime.timedelta(hours=2))] - result = [pa.lib.string_to_tzinfo(i) for i in string] - assert result == expected - - except ImportError: - pytest.skip('requires pytz or zoneinfo to be installed') - - -def test_timezone_string_roundtrip_pytz(): + result = [pa.lib.string_to_tzinfo(i) for i in string] + expected = [ + zoneinfo.ZoneInfo('UTC'), + zoneinfo.ZoneInfo('Europe/Paris'), + datetime.timezone(datetime.timedelta(hours=3)), + datetime.timezone(datetime.timedelta(hours=1, minutes=30)), + datetime.timezone(-datetime.timedelta(hours=2)), + ] + assert result == expected + + +def test_string_to_tzinfo_pytz_fallback(): pytz = pytest.importorskip("pytz") + result = pa.lib.string_to_tzinfo("europe/brussels") + expected = pytz.timezone("Europe/Brussels") + assert result == expected + - tz = [pytz.FixedOffset(90), pytz.FixedOffset(-90), - pytz.utc, pytz.timezone('America/New_York')] +def test_timezone_string_roundtrip(): + tz = [datetime.timezone(datetime.timedelta(hours=1, minutes=30)), + datetime.timezone(datetime.timedelta(hours=-1, minutes=-30)), + zoneinfo.ZoneInfo('UTC'), + zoneinfo.ZoneInfo('America/New_York')] name = ['+01:30', '-01:30', 'UTC', 'America/New_York'] assert [pa.lib.tzinfo_to_string(i) for i in tz] == name From 443ed5b2f7c3a85104eb2df2808884ecfea9f193 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 8 Apr 2026 21:19:23 +0200 Subject: [PATCH 2/5] add compat for older pandas; only prefer zoneinfo for pandas 3+ --- python/pyarrow/includes/libarrow_python.pxd | 2 +- python/pyarrow/pandas_compat.py | 7 +++-- python/pyarrow/src/arrow/python/datetime.cc | 35 ++++++++++++++++++++- python/pyarrow/src/arrow/python/datetime.h | 2 +- python/pyarrow/tests/test_types.py | 8 +++++ python/pyarrow/types.pxi | 12 +++++-- 6 files changed, 57 insertions(+), 9 deletions(-) diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd index 72c278d3e74b..385a2924d1da 100644 --- a/python/pyarrow/includes/libarrow_python.pxd +++ b/python/pyarrow/includes/libarrow_python.pxd @@ -213,7 +213,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil: CTimePoint TimePoint_from_ns(int64_t val) CResult[c_string] TzinfoToString(PyObject* pytzinfo) - CResult[PyObject*] StringToTzinfo(c_string) + CResult[PyObject*] StringToTzinfo(c_string, c_bool) cdef extern from "arrow/python/numpy_init.h" namespace "arrow::py": diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index dfca59cbf5f9..d27a95b9f9d8 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -787,7 +787,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block= def make_datetimetz(unit, tz): if _pandas_api.is_v1(): unit = 'ns' # ARROW-3789: Coerce date/timestamp types to datetime64[ns] - tz = pa.lib.string_to_tzinfo(tz) + tz = pa.lib.string_to_tzinfo(tz, prefer_zoneinfo=_pandas_api.is_ge_v3()) return _pandas_api.datetimetz_type(unit, tz=tz) @@ -1183,7 +1183,8 @@ def _reconstruct_columns_from_metadata(columns, column_indexes): # ARROW-13756: if index is timezone aware DataTimeIndex elif pandas_dtype == "datetimetz": tz = pa.lib.string_to_tzinfo( - column_indexes[0]['metadata']['timezone']) + column_indexes[0]['metadata']['timezone'], + prefer_zoneinfo=_pandas_api.is_ge_v3()) level = pd.to_datetime(level, utc=True).tz_convert(tz) if _pandas_api.is_ge_v3(): # with pandas 3+, to_datetime returns a unit depending on the string @@ -1289,7 +1290,7 @@ def make_tz_aware(series, tz): """ Make a datetime64 Series timezone-aware for the given tz """ - tz = pa.lib.string_to_tzinfo(tz) + tz = pa.lib.string_to_tzinfo(tz, prefer_zoneinfo=_pandas_api.is_ge_v3()) series = (series.dt.tz_localize('utc') .dt.tz_convert(tz)) return series diff --git a/python/pyarrow/src/arrow/python/datetime.cc b/python/pyarrow/src/arrow/python/datetime.cc index 074dbd305bcb..fe0349c9453a 100644 --- a/python/pyarrow/src/arrow/python/datetime.cc +++ b/python/pyarrow/src/arrow/python/datetime.cc @@ -368,12 +368,45 @@ Result PyTZInfo_utcoffset_hhmm(PyObject* pytzinfo) { // Converted from python. See https://github.com/apache/arrow/pull/7604 // for details. -Result StringToTzinfo(const std::string& tz) { +Result StringToTzinfo(const std::string& tz, bool prefer_zoneinfo) { std::string_view sign_str, hour_str, minute_str; OwnedRef pytz; OwnedRef zoneinfo; OwnedRef datetime; + // Legacy behavior: prefer pytz objects when available. + if (!prefer_zoneinfo && internal::ImportModule("pytz", &pytz).ok()) { + if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) { + int sign = -1; + if (sign_str == "+") { + sign = 1; + } + OwnedRef fixed_offset; + RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "FixedOffset", &fixed_offset)); + uint32_t minutes, hours; + if (!::arrow::internal::ParseUnsigned(hour_str.data(), hour_str.size(), &hours) || + !::arrow::internal::ParseUnsigned(minute_str.data(), minute_str.size(), + &minutes)) { + return Status::Invalid("Invalid timezone: ", tz); + } + OwnedRef total_minutes(PyLong_FromLong( + sign * ((static_cast(hours) * 60) + static_cast(minutes)))); + RETURN_IF_PYERROR(); + auto tzinfo = + PyObject_CallFunctionObjArgs(fixed_offset.obj(), total_minutes.obj(), NULL); + RETURN_IF_PYERROR(); + return tzinfo; + } + + OwnedRef timezone; + RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "timezone", &timezone)); + OwnedRef py_tz_string( + PyUnicode_FromStringAndSize(tz.c_str(), static_cast(tz.size()))); + auto tzinfo = PyObject_CallFunctionObjArgs(timezone.obj(), py_tz_string.obj(), NULL); + RETURN_IF_PYERROR(); + return tzinfo; + } + // Handle fixed offsets with datetime.timezone, independent of pytz availability. if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) { RETURN_NOT_OK(internal::ImportModule("datetime", &datetime)); diff --git a/python/pyarrow/src/arrow/python/datetime.h b/python/pyarrow/src/arrow/python/datetime.h index 9b21eeb43421..84f46fe2d19c 100644 --- a/python/pyarrow/src/arrow/python/datetime.h +++ b/python/pyarrow/src/arrow/python/datetime.h @@ -188,7 +188,7 @@ Result PyDateTime_utcoffset_s(PyObject* pydatetime); /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 /// GIL must be held when calling this method. ARROW_PYTHON_EXPORT -Result StringToTzinfo(const std::string& tz); +Result StringToTzinfo(const std::string& tz, bool prefer_zoneinfo = true); /// \brief Convert a time zone object to a string representation. /// diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 3d260fee24c3..bf3ef650c750 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -503,6 +503,14 @@ def test_string_to_tzinfo(): assert result == expected +def test_string_to_tzinfo_prefer_zoneinfo_false(): + pytz = pytest.importorskip("pytz") + result = pa.lib.string_to_tzinfo("Europe/Brussels", prefer_zoneinfo=False) + assert result == pytz.timezone("Europe/Brussels") + result = pa.lib.string_to_tzinfo("+01:30", prefer_zoneinfo=False) + assert result == pytz.FixedOffset(90) + + def test_string_to_tzinfo_pytz_fallback(): pytz = pytest.importorskip("pytz") result = pa.lib.string_to_tzinfo("europe/brussels") diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index e9eef8965153..ec1a5a2ba9a3 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -4166,7 +4166,7 @@ def tzinfo_to_string(tz): return frombytes(GetResultValue(TzinfoToString(tz))) -def string_to_tzinfo(name): +def string_to_tzinfo(name, *, prefer_zoneinfo=True): """ Convert a time zone name into a time zone object. @@ -4177,15 +4177,21 @@ def string_to_tzinfo(name): Parameters ---------- - name: str + name: str Time zone name. + prefer_zoneinfo : bool, default True + If True, resolve named timezones using ``zoneinfo`` first and only + fall back to ``pytz`` when needed. If False, prefer ``pytz`` when it + is available. Returns ------- tz : datetime.tzinfo Time zone object """ - cdef PyObject* tz = GetResultValue(StringToTzinfo(name.encode('utf-8'))) + cdef PyObject* tz = GetResultValue( + StringToTzinfo(name.encode('utf-8'), prefer_zoneinfo) + ) return PyObject_to_object(tz) From 2d9a98209dc27664f43e1a496282d1765099dfb8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 8 Apr 2026 21:20:42 +0200 Subject: [PATCH 3/5] undo pre-commit changes --- .pre-commit-config.yaml | 76 ++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2e1d47d27e54..1a1267d32cf0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,20 +21,20 @@ # To run all hooks on all files use `pre-commit run -a` repos: - # - repo: local - # hooks: - # - id: rat - # name: Release Audit Tool - # language: system - # entry: | - # bash -c " \ - # git archive HEAD \ - # --prefix=apache-arrow/ \ - # --output=apache-arrow.tar.gz && \ - # dev/release/run-rat.sh apache-arrow.tar.gz && \ - # rm -f apache-arrow.tar.gz" - # always_run: true - # pass_filenames: false + - repo: local + hooks: + - id: rat + name: Release Audit Tool + language: system + entry: | + bash -c " \ + git archive HEAD \ + --prefix=apache-arrow/ \ + --output=apache-arrow.tar.gz && \ + dev/release/run-rat.sh apache-arrow.tar.gz && \ + rm -f apache-arrow.tar.gz" + always_run: true + pass_filenames: false - repo: https://github.com/hadolint/hadolint rev: v2.12.0 hooks: @@ -188,30 +188,30 @@ repos: ?^python/pyarrow/util\.py$| ?^python/pyarrow/vendored/| ) - # - repo: local - # hooks: - # - id: lintr - # alias: r - # name: R Lint - # language: r - # additional_dependencies: - # - cyclocomp - # - lintr - # - testthat - # entry: | - # Rscript -e "Sys.setenv(NOT_CRAN = 'TRUE'); lintr::expect_lint_free('r')" - # pass_filenames: false - # files: >- - # ^r/.*\.(R|Rmd)$ - # - repo: local - # hooks: - # - id: air-format - # alias: r - # name: R Format (Air) - # language: system - # entry: air format r --check - # files: >- - # ^r/.*\.R$ + - repo: local + hooks: + - id: lintr + alias: r + name: R Lint + language: r + additional_dependencies: + - cyclocomp + - lintr + - testthat + entry: | + Rscript -e "Sys.setenv(NOT_CRAN = 'TRUE'); lintr::expect_lint_free('r')" + pass_filenames: false + files: >- + ^r/.*\.(R|Rmd)$ + - repo: local + hooks: + - id: air-format + alias: r + name: R Format (Air) + language: system + entry: air format r --check + files: >- + ^r/.*\.R$ - repo: https://github.com/pre-commit/mirrors-clang-format rev: v18.1.8 hooks: From dc184fe090a2f510cfc86b17af46a95a304d2593 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 9 Apr 2026 10:52:03 +0200 Subject: [PATCH 4/5] test fixes (macos, old pandas) --- python/pyarrow/src/arrow/python/datetime.cc | 11 +++++------ python/pyarrow/tests/test_pandas.py | 20 ++++++++++++++++---- python/pyarrow/tests/test_types.py | 3 +++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/python/pyarrow/src/arrow/python/datetime.cc b/python/pyarrow/src/arrow/python/datetime.cc index fe0349c9453a..6a835c2d37ce 100644 --- a/python/pyarrow/src/arrow/python/datetime.cc +++ b/python/pyarrow/src/arrow/python/datetime.cc @@ -374,7 +374,7 @@ Result StringToTzinfo(const std::string& tz, bool prefer_zoneinfo) { OwnedRef zoneinfo; OwnedRef datetime; - // Legacy behavior: prefer pytz objects when available. + // Legacy behavior: prefer pytz objects when available if (!prefer_zoneinfo && internal::ImportModule("pytz", &pytz).ok()) { if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) { int sign = -1; @@ -407,7 +407,7 @@ Result StringToTzinfo(const std::string& tz, bool prefer_zoneinfo) { return tzinfo; } - // Handle fixed offsets with datetime.timezone, independent of pytz availability. + // Handle fixed offsets with datetime.timezone if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) { RETURN_NOT_OK(internal::ImportModule("datetime", &datetime)); int sign = -1; @@ -448,7 +448,7 @@ Result StringToTzinfo(const std::string& tz, bool prefer_zoneinfo) { return tzinfo; } - // Prefer zoneinfo for named timezones when available. + // Use zoneinfo for named timezones when available if (internal::ImportModule("zoneinfo", &zoneinfo).ok()) { OwnedRef class_zoneinfo; RETURN_NOT_OK( @@ -461,7 +461,7 @@ Result StringToTzinfo(const std::string& tz, bool prefer_zoneinfo) { return tzinfo; } - // Keep backwards compatibility for named timezones only available in pytz. + // Keep backwards compatibility for named timezones only available in pytz PyErr_Clear(); } @@ -475,8 +475,7 @@ Result StringToTzinfo(const std::string& tz, bool prefer_zoneinfo) { return tzinfo; } - return Status::Invalid( - "Python>=3.9 for zoneinfo module or pytz package must be installed."); + return Status::Invalid("The zoneinfo module or pytz package must be installed."); } Result TzinfoToString(PyObject* tzinfo) { diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index f197861b9d11..07f214fd9b8e 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -1179,9 +1179,12 @@ def test_python_datetime_with_pytz_tzinfo(self): for tz, tz_zoneinfo in zip(timezones_pytz, timezones_zoneinfo): values = [tz.localize(datetime(2018, 1, 1, 12, 23, 45))] df = pd.DataFrame({'datetime': values}) - df_expected = pd.DataFrame( - {'datetime': [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz_zoneinfo)]} - ) + if Version(pd.__version__) >= Version("3.0.0"): + df_expected = pd.DataFrame( + {'datetime': [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz_zoneinfo)]} + ) + else: + df_expected = None _check_pandas_roundtrip(df, expected=df_expected) @h.given(st.none() | past.timezones) @@ -1205,7 +1208,16 @@ def test_python_datetime_with_timezone_tzinfo(self): tz_timezone = timezone(timedelta(hours=hours)) values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz_timezone)] df = pd.DataFrame({'datetime': values}, index=values) - _check_pandas_roundtrip(df, preserve_index=True) + if Version(pd.__version__) < Version("3.0.0"): + # datetime.timezone is going to be pytz.FixedOffset + pytz = pytest.importorskip("pytz") + tz_pytz = pytz.FixedOffset(hours * 60) + values_exp = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz_pytz)] + df_exp = pd.DataFrame({'datetime': values_exp}, index=values_exp) + else: + df_exp = None + + _check_pandas_roundtrip(df, expected=df_exp, preserve_index=True) def test_python_datetime_subclass(self): diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index bf3ef650c750..4a33d79223d0 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -511,6 +511,9 @@ def test_string_to_tzinfo_prefer_zoneinfo_false(): assert result == pytz.FixedOffset(90) +@pytest.mark.skipif( + sys.platform == 'darwin', reason="macOS supports those lower-case names" +) def test_string_to_tzinfo_pytz_fallback(): pytz = pytest.importorskip("pytz") result = pa.lib.string_to_tzinfo("europe/brussels") From c2c973aca5cebdff8bffc96afdcbb7b1ef7894b1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 9 Apr 2026 13:48:16 +0200 Subject: [PATCH 5/5] also use correct tz class in TimestampScalar.as_py it it returns pandas object --- python/pyarrow/scalar.pxi | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index a6377b2bb707..636787f916d5 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -822,7 +822,14 @@ cdef class TimestampScalar(Scalar): return None if not dtype.timezone().empty(): - tzinfo = string_to_tzinfo(frombytes(dtype.timezone())) + prefer_zoneinfo = True + # only we this method would return a pandas.Timestamp, prefer + # zoneinfo depending on the pandas version + if _pandas_api.have_pandas and dtype.unit() == TimeUnit_NANO: + prefer_zoneinfo = _pandas_api.is_ge_v3() + tzinfo = string_to_tzinfo( + frombytes(dtype.timezone()), prefer_zoneinfo=prefer_zoneinfo + ) else: tzinfo = None