Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions babel/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,9 @@ def get_timezone_name(
info = locale.time_zones.get(zone, {})
# Try explicitly translated zone names first
if width in info and zone_variant in info[width]:
return info[width][zone_variant]
value = info[width][zone_variant]
if value != NO_INHERITANCE_MARKER:
return value

metazone = get_global('meta_zones').get(zone)
if metazone:
Expand All @@ -660,7 +662,7 @@ def get_timezone_name(
# If the short form is marked no-inheritance,
# try to fall back to the long name instead.
name = metazone_info.get('long', {}).get(zone_variant)
if name:
if name and name != NO_INHERITANCE_MARKER:
return name

# If we have a concrete datetime, we assume that the result can't be
Expand Down
1 change: 1 addition & 0 deletions misc/icu4c-tools/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bin/
3 changes: 3 additions & 0 deletions misc/icu4c-tools/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
bin/icu4c_date_format: icu4c_date_format.cpp
mkdir -p bin
$(CXX) -Wall -std=c++17 -o $@ $^ $(shell pkg-config --cflags --libs icu-uc icu-i18n)
22 changes: 22 additions & 0 deletions misc/icu4c-tools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# icu4c-tools

Some haphazard tools for cross-checking results between ICU4C and Babel.
These are not meant to be production-ready or e.g. guaranteed to not leak memory in any way.

## icu4c_date_format

### Compiling

This worked on my macOS – on a Linux machine, you shouldn't need the `PKG_CONFIG_PATH` environment variable.

```
env PKG_CONFIG_PATH="/opt/homebrew/opt/icu4c@76/lib/pkgconfig" make bin/icu4c_date_format
```

### Running

E.g.

```
env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm zz" bin/icu4c_date_format
```
101 changes: 101 additions & 0 deletions misc/icu4c-tools/icu4c_date_format.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#include <iostream>
#include <sstream>
#include <unicode/smpdtfmt.h>
#include <unicode/timezone.h>

static std::vector<std::string> split(const std::string &s, char delimiter) {
std::vector<std::string> tokens;
std::string token;
std::istringstream tokenStream(s);
while (std::getline(tokenStream, token, delimiter)) {
tokens.push_back(token);
}
return tokens;
}

static UDate parse_time_str(const char *time_str) {
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString fauxISO8601("yyyy-MM-dd'T'hh:mm:ss'Z'");
auto fmt = new icu::SimpleDateFormat(fauxISO8601, status);
fmt->setTimeZone(*icu::TimeZone::getGMT());
UDate date = fmt->parse(icu::UnicodeString(time_str), status);
if (U_FAILURE(status)) {
std::cerr << "Failed to parse time string: " << time_str << std::endl;
exit(1);
}
return date;
}

static std::vector<icu::Locale> parse_locales(const char *locales_str) {
auto locales = std::vector<icu::Locale>{};
for (auto token : split(locales_str, ',')) {
auto loc = icu::Locale(token.c_str());
if (loc.isBogus()) {
std::cerr << "Invalid locale: " << token << std::endl;
exit(1);
}
locales.push_back(loc);
}
return locales;
}

static std::vector<icu::TimeZone *> parse_timezones(const char *timezones_str) {
auto timezones = std::vector<icu::TimeZone *>{};
for (auto token : split(timezones_str, ',')) {
auto tz = icu::TimeZone::createTimeZone(token.c_str());
if (tz == nullptr) {
std::cerr << "Invalid timezone: " << token << std::endl;
exit(1);
}
timezones.push_back(tz);
}
return timezones;
}

int main() {
UErrorCode status = U_ZERO_ERROR;
const char *timezones_str = getenv("TEST_TIMEZONES");
const char *locales_str = getenv("TEST_LOCALES");
const char *time_str = getenv("TEST_TIME");
const char *time_format_str = getenv("TEST_TIME_FORMAT");

if (!timezones_str || !locales_str) {
std::cerr << "Please set TEST_TIMEZONES, TEST_LOCALES environment variables"
<< std::endl;
return 1;
}

if (time_str == nullptr) {
time_str = "2025-03-04T13:53:00Z";
std::cerr << "Defaulting TEST_TIME to " << time_str << std::endl;
}

if (time_format_str == nullptr) {
time_format_str = "z:zz:zzz:zzzz";
std::cerr << "Defaulting TEST_TIME_FORMAT to " << time_format_str
<< std::endl;
}

auto date = parse_time_str(time_str);
auto timezones = parse_timezones(timezones_str);
auto locales = parse_locales(locales_str);

for (auto tz : timezones) {
icu::UnicodeString tzid;
tz->getID(tzid);
std::string tzid_str;
tzid.toUTF8String(tzid_str);
for (auto loc : locales) {
auto fmt = new icu::SimpleDateFormat(time_format_str, loc, status);
fmt->setTimeZone(*tz);
icu::UnicodeString name;
fmt->format(date, name);
std::string result;
name.toUTF8String(result);
std::cout << tzid_str << "\t" << loc.getName() << "\t" << result
<< std::endl;
delete fmt;
}
}
return 0;
}
30 changes: 30 additions & 0 deletions tests/test_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,3 +1187,33 @@ def test_issue_1089():
def test_issue_1162(locale, format, negative, expected):
delta = timedelta(seconds=10800) * (-1 if negative else +1)
assert dates.format_timedelta(delta, add_direction=True, format=format, locale=locale) == expected


def test_issue_1192():
# The actual returned value here is not actually strictly specified ("get_timezone_name"
# is not an operation specified as such). Issue #1192 concerned this invocation returning
# the invalid "no inheritance marker" value; _that_ should never be returned here.
# IOW, if the below "Hawaii-Aleutian Time" changes with e.g. CLDR updates, that's fine.
assert dates.get_timezone_name('Pacific/Honolulu', 'short', locale='en_GB') == "Hawaii-Aleutian Time"


@pytest.mark.xfail
def test_issue_1192_fmt(timezone_getter):
"""
There is an issue in how we format the fallback for z/zz in the absence of data
(esp. with the no inheritance marker present).
This test is marked xfail until that's fixed.
"""
# env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm z" bin/icu4c_date_format
# Defaulting TEST_TIME to 2025-03-04T13:53:00Z
# Pacific/Honolulu en_US 2025-03-04 3:53 HST
# Pacific/Honolulu en_GB 2025-03-04 3:53 GMT-10
# env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm zz" bin/icu4c_date_format
# Pacific/Honolulu en_US 2025-03-04 3:53 HST
# Pacific/Honolulu en_GB 2025-03-04 3:53 GMT-10
tz = timezone_getter("Pacific/Honolulu")
dt = _localize(tz, datetime(2025, 3, 4, 13, 53, tzinfo=UTC))
assert dates.format_datetime(dt, "YYYY-MM-dd H:mm z", locale="en_US") == "2025-03-04 3:53 HST"
assert dates.format_datetime(dt, "YYYY-MM-dd H:mm z", locale="en_GB") == "2025-03-04 3:53 GMT-10"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did some digging and we get Hawaii-Aleutian Standard Time instead of GMT-10 here because of this fallback to the long tz name:

babel/babel/dates.py

Lines 659 to 662 in 2b93a4a

if width == 'short' and name == NO_INHERITANCE_MARKER:
# If the short form is marked no-inheritance,
# try to fall back to the long name instead.
name = metazone_info.get('long', {}).get(zone_variant)

When I get rid of it, get_timezone_gmt is used instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah, I should've mentioned I also found the culprit, but it's out of scope for this PR 😅

assert dates.format_datetime(dt, "YYYY-MM-dd H:mm zz", locale="en_US") == "2025-03-04 3:53 HST"
assert dates.format_datetime(dt, "YYYY-MM-dd H:mm zz", locale="en_GB") == "2025-03-04 3:53 GMT-10"
Loading