Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 21 additions & 12 deletions pyiceberg/catalog/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,23 +551,32 @@ def commit_table(

if hive_table and current_table:
# Table exists, update it.
new_parameters = _construct_parameters(

# Note on table properties:
# - Iceberg table properties are stored in both HMS and Iceberg metadata JSON.
# - Updates are reflected in both locations
# - Existing HMS table properties (set by external systems like Hive/Spark) are preserved.
#
# While it is possible to modify HMS table properties through this API, it is not recommended:
# - Mixing HMS-specific properties in Iceberg metadata can cause confusion
# - New/updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific)
# - HMS-native properties (set outside Iceberg) cannot be deleted since they are not visible to Iceberg
# (However, if you first SET an HMS property via Iceberg, it becomes tracked in Iceberg metadata,
# and can then be deleted via Iceberg - which removes it from both Iceberg metadata and HMS)
new_iceberg_properties = _construct_parameters(
metadata_location=updated_staged_table.metadata_location,
previous_metadata_location=current_table.metadata_location,
metadata_properties=updated_staged_table.properties,
)

# Detect properties that were removed from Iceberg metadata
removed_keys = current_table.properties.keys() - updated_staged_table.properties.keys()

# Sync HMS parameters: Iceberg metadata is the source of truth, HMS parameters are
# a projection of Iceberg state plus any HMS-only properties.
# Start with existing HMS params, remove deleted Iceberg properties, then apply Iceberg values.
merged_params = dict(hive_table.parameters or {})
for key in removed_keys:
merged_params.pop(key, None)
merged_params.update(new_parameters)
hive_table.parameters = merged_params
deleted_iceberg_properties = current_table.properties.keys() - updated_staged_table.properties.keys()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this is non blocking but seems like this logic is technically still "constructing parameters". wdyt of moving this into constructing parameters and adding these comments as a python docstring?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mean move this to the _construct_parameters function? _construct_parameters is technically "construct iceberg parameters" since its only dealing with iceberg table properties

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see it's just a matter of parameters and properties.


# Merge: preserve HMS-native properties, remove deleted Iceberg properties, apply new Iceberg properties
existing_hms_parameters = dict(hive_table.parameters or {})
for key in deleted_iceberg_properties:
existing_hms_parameters.pop(key, None)
existing_hms_parameters.update(new_iceberg_properties)
hive_table.parameters = existing_hms_parameters

# Update hive's schema and properties
hive_table.sd = _construct_hive_storage_descriptor(
Expand Down
60 changes: 60 additions & 0 deletions tests/integration/test_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,66 @@ def test_hive_critical_properties_always_from_iceberg(catalog: Catalog) -> None:
assert new_metadata_location != original_metadata_location, "metadata_location should be updated!"


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
def test_hive_native_properties_cannot_be_deleted_via_iceberg(catalog: Catalog) -> None:
"""Test that HMS-native properties (set outside Iceberg) cannot be deleted via Iceberg.

HMS-native properties are not visible to Iceberg, so remove_properties fails with KeyError.
However, if you first SET an HMS property via Iceberg (making it tracked in Iceberg metadata),
it can then be deleted via Iceberg.
"""
table = create_table(catalog)
hive_client: _HiveClient = _HiveClient(catalog.properties["uri"])

# Set an HMS-native property directly (not through Iceberg)
with hive_client as open_client:
hive_table = open_client.get_table(*TABLE_NAME)
hive_table.parameters["hms_native_prop"] = "native_value"
open_client.alter_table(TABLE_NAME[0], TABLE_NAME[1], hive_table)

# Verify the HMS-native property exists in HMS
with hive_client as open_client:
hive_table = open_client.get_table(*TABLE_NAME)
assert hive_table.parameters.get("hms_native_prop") == "native_value"

# Refresh the Iceberg table to get the latest state
table.refresh()

# Verify the HMS-native property is NOT visible in Iceberg
assert "hms_native_prop" not in table.properties

# Attempt to remove the HMS-native property via Iceberg - this should fail
# because the property is not tracked in Iceberg metadata (not visible to Iceberg)
with pytest.raises(KeyError):
table.transaction().remove_properties("hms_native_prop").commit_transaction()

# HMS-native property should still exist (cannot be deleted via Iceberg)
with hive_client as open_client:
hive_table = open_client.get_table(*TABLE_NAME)
assert hive_table.parameters.get("hms_native_prop") == "native_value", (
"HMS-native property should still exist since Iceberg removal failed!"
)

# Now SET the same property via Iceberg (this makes it tracked in Iceberg metadata)
table.transaction().set_properties({"hms_native_prop": "iceberg_value"}).commit_transaction()

# Verify it's updated in both places
with hive_client as open_client:
hive_table = open_client.get_table(*TABLE_NAME)
assert hive_table.parameters.get("hms_native_prop") == "iceberg_value"

# Now we CAN delete it via Iceberg (because it's now tracked in Iceberg metadata)
table.transaction().remove_properties("hms_native_prop").commit_transaction()

# Property should be deleted from HMS
with hive_client as open_client:
hive_table = open_client.get_table(*TABLE_NAME)
assert hive_table.parameters.get("hms_native_prop") is None, (
"Property should be deletable after being SET via Iceberg (making it tracked)!"
)


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_table_properties_dict(catalog: Catalog) -> None:
Expand Down