diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 88a7bd00c2..1db0b89e8c 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1111,6 +1111,7 @@ def refresh(self) -> Table: An updated instance of the same Iceberg table """ fresh = self.catalog.load_table(self._identifier) + self._check_uuid(self.metadata, fresh.metadata) self.metadata = fresh.metadata self.io = fresh.io self.metadata_location = fresh.metadata_location @@ -1491,9 +1492,22 @@ def refs(self) -> dict[str, SnapshotRef]: """Return the snapshot references in the table.""" return self.metadata.refs + @staticmethod + def _check_uuid(current_metadata: TableMetadata, new_metadata: TableMetadata) -> None: + """Validate that the table UUID matches after refresh.""" + current = current_metadata.table_uuid + refreshed = new_metadata.table_uuid + + if current != refreshed: + raise ValueError(f"Table UUID does not match: current={current} != refreshed={refreshed}") + def _do_commit(self, updates: tuple[TableUpdate, ...], requirements: tuple[TableRequirement, ...]) -> None: response = self.catalog.commit_table(self, requirements, updates) + # Check UUID to detect table replacement (matches Java's RESTTableOperations.updateCurrentMetadata) + if not isinstance(self, StagedTable): + self._check_uuid(self.metadata, response.metadata) + # https://github.com/apache/iceberg/blob/f6faa58/core/src/main/java/org/apache/iceberg/CatalogUtil.java#L527 # delete old metadata if METADATA_DELETE_AFTER_COMMIT_ENABLED is set to true and uses # TableProperties.METADATA_PREVIOUS_VERSIONS_MAX to determine how many previous versions to keep - diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 5aae59497b..3dc1b5dfae 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -48,6 +48,7 @@ from pyiceberg.table.sorting import SortField, SortOrder from pyiceberg.transforms import IdentityTransform, TruncateTransform from pyiceberg.typedef import RecursiveDict +from pyiceberg.types import StringType from pyiceberg.utils.config import Config TEST_URI = "https://iceberg-test-catalog/" @@ -2155,3 +2156,87 @@ def test_view_endpoints_enabled_with_config(self, requests_mock: Mocker) -> None # View endpoints should be supported when enabled catalog._check_endpoint(Capability.V1_LIST_VIEWS) catalog._check_endpoint(Capability.V1_DELETE_VIEW) + + +def test_table_uuid_check_on_commit(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None: + """Test that UUID mismatch is detected on commit response (matches Java RESTTableOperations behavior).""" + original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1" + different_uuid = "550e8400-e29b-41d4-a716-446655440000" + metadata_location = "s3://warehouse/database/table/metadata.json" + + rest_mock.get( + f"{TEST_URI}v1/namespaces/namespace/tables/table_name", + json={ + "metadata-location": metadata_location, + "metadata": example_table_metadata_v2, + "config": {}, + }, + status_code=200, + request_headers=TEST_HEADERS, + ) + + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN) + table = catalog.load_table(("namespace", "table_name")) + + assert str(table.metadata.table_uuid) == original_uuid + + metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid} + + rest_mock.post( + f"{TEST_URI}v1/namespaces/namespace/tables/table_name", + json={ + "metadata-location": metadata_location, + "metadata": metadata_with_different_uuid, + }, + status_code=200, + request_headers=TEST_HEADERS, + ) + + with pytest.raises(ValueError) as exc_info: + table.update_schema().add_column("new_col", StringType()).commit() + + assert "Table UUID does not match" in str(exc_info.value) + assert f"current={original_uuid}" in str(exc_info.value) + assert f"refreshed={different_uuid}" in str(exc_info.value) + + +def test_table_uuid_check_on_refresh(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None: + original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1" + different_uuid = "550e8400-e29b-41d4-a716-446655440000" + metadata_location = "s3://warehouse/database/table/metadata.json" + + rest_mock.get( + f"{TEST_URI}v1/namespaces/namespace/tables/table_name", + json={ + "metadata-location": metadata_location, + "metadata": example_table_metadata_v2, + "config": {}, + }, + status_code=200, + request_headers=TEST_HEADERS, + ) + + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN) + table = catalog.load_table(("namespace", "table_name")) + + assert str(table.metadata.table_uuid) == original_uuid + + metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid} + + rest_mock.get( + f"{TEST_URI}v1/namespaces/namespace/tables/table_name", + json={ + "metadata-location": metadata_location, + "metadata": metadata_with_different_uuid, + "config": {}, + }, + status_code=200, + request_headers=TEST_HEADERS, + ) + + with pytest.raises(ValueError) as exc_info: + table.refresh() + + assert "Table UUID does not match" in str(exc_info.value) + assert f"current={original_uuid}" in str(exc_info.value) + assert f"refreshed={different_uuid}" in str(exc_info.value) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index e40513fe86..ff5fbbf37c 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -1639,3 +1639,21 @@ def model_roundtrips(model: BaseModel) -> bool: if model != type(model).model_validate(model_data): pytest.fail(f"model {type(model)} did not roundtrip successfully") return True + + +def test_check_uuid_raises_when_mismatch(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None: + different_uuid = "550e8400-e29b-41d4-a716-446655440000" + metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid} + new_metadata = TableMetadataV2(**metadata_with_different_uuid) + + with pytest.raises(ValueError) as exc_info: + Table._check_uuid(table_v2.metadata, new_metadata) + + assert "Table UUID does not match" in str(exc_info.value) + assert different_uuid in str(exc_info.value) + + +def test_check_uuid_passes_when_match(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None: + new_metadata = TableMetadataV2(**example_table_metadata_v2) + # Should not raise with same uuid + Table._check_uuid(table_v2.metadata, new_metadata)