diff --git a/src/dags/libs/validation/validate_referential_integrity.py b/src/dags/libs/validation/validate_referential_integrity.py index ac7f9d471975b16a29772dff09448ffc9db69e60..6070b5d9d7133889d0e8fab9ccd775ff62680309 100644 --- a/src/dags/libs/validation/validate_referential_integrity.py +++ b/src/dags/libs/validation/validate_referential_integrity.py @@ -161,7 +161,10 @@ class ManifestIntegrity: found_ids = search_handler.search_records() for entity_id in external_references: - if entity_id.srn not in found_ids: + # As found_ids contains ids with versions and bare ids, and if entity_id is an id + # with no version (refers to the last version), we use just the bare id. + entity_srn = entity_id.srn if entity_id.version else entity_id.id + if entity_srn not in found_ids: missing_ids.add(entity_id.srn) return missing_ids diff --git a/tests/plugin-unit-tests/test_manifest_integrity.py b/tests/plugin-unit-tests/test_manifest_integrity.py index cd45249ab16f2c33d18444393512b923bac948c5..d933da63d805d0e25b04d9acb0e38659dd7661cd 100644 --- a/tests/plugin-unit-tests/test_manifest_integrity.py +++ b/tests/plugin-unit-tests/test_manifest_integrity.py @@ -17,6 +17,7 @@ import copy import json import os import sys +from typing import List, Set sys.path.append(f"{os.getenv('AIRFLOW_SRC_DIR')}/plugins") sys.path.append(f"{os.getenv('AIRFLOW_SRC_DIR')}/dags") @@ -26,18 +27,31 @@ from mock_providers import get_test_credentials from file_paths import MANIFEST_WELL_PATH, REF_RESULT_WELL_PATH, MANIFEST_WELLLOG_PATH, \ REF_RESULT_WELLLOG_PATH, MANIFEST_GENERIC_PATH from libs.exceptions import ValidationIntegrityError -from libs.refresh_token import AirflowTokenRefresher +from libs.refresh_token import BaseTokenRefresher from libs.context import Context from libs.search_record_ids import ExtendedSearchId from libs.validation.validate_referential_integrity import ManifestIntegrity +from libs.utils import EntityId, split_id class TestIntegrityProvider: + @staticmethod + def mock_valid_extended_search(monkeypatch, entity_references: List[EntityId]): + search_response = set() + for entity in entity_references: + search_response.add(entity.id) + search_response.add(entity.srn) + monkeypatch.setattr( + ExtendedSearchId, + "search_records", + lambda *args, **kwargs: search_response + ) + @pytest.fixture def manifest_integrity(self) -> ManifestIntegrity: context = Context(app_key="", data_partition_id="test") - manifest_integrity = ManifestIntegrity("", AirflowTokenRefresher(get_test_credentials()), + manifest_integrity = ManifestIntegrity("", BaseTokenRefresher(get_test_credentials()), context) return manifest_integrity @@ -121,11 +135,7 @@ class TestIntegrityProvider: references = manifest_integrity._extract_references(work_product_component) manifest_records = manifest_integrity._extract_external_references( work_product_component, references) - monkeypatch.setattr( - ExtendedSearchId, - "search_records", - lambda *args, **kwargs: [e.srn for e in manifest_records] - ) + self.mock_valid_extended_search(monkeypatch, manifest_records) expected_wpc_list = copy.deepcopy(manifest["Data"]["WorkProductComponents"]) manifest_integrity.ensure_integrity(manifest) assert expected_wpc_list == manifest["Data"]["WorkProductComponents"] @@ -148,11 +158,7 @@ class TestIntegrityProvider: references = manifest_integrity._extract_references(work_product_component) manifest_records = manifest_integrity._extract_external_references( work_product_component, references) - monkeypatch.setattr( - ExtendedSearchId, - "search_records", - lambda *args, **kwargs: [mr.srn for mr in manifest_records] - ) + self.mock_valid_extended_search(monkeypatch, manifest_records) expected_wpc_list = copy.deepcopy(manifest["Data"]["WorkProductComponents"]) manifest_integrity.ensure_integrity(manifest) assert expected_wpc_list == manifest["Data"]["WorkProductComponents"] @@ -200,6 +206,71 @@ class TestIntegrityProvider: manifest["Data"]["WorkProductComponents"]) assert wrong_wpc not in valid_wpcs + @pytest.mark.parametrize( + "external_references,search_response,expected_missing_references", + [ + pytest.param( + [ + "osdu:reference-data--ResourceSecurityClassification:Public:", + "osdu:master-data--Organisation:HESS:", + ], + set(), + [ + "osdu:reference-data--ResourceSecurityClassification:Public:", + "osdu:master-data--Organisation:HESS:", + ], + id="Empty search return" + ), + pytest.param( + [ + "osdu:reference-data--ResourceSecurityClassification:Public:", + "osdu:master-data--Organisation:HESS:123", + ], + { + "osdu:reference-data--ResourceSecurityClassification:Public", + "osdu:reference-data--ResourceSecurityClassification:Public:123", + "osdu:master-data--Organisation:HESS", + "osdu:master-data--Organisation:HESS:123", + }, + set(), + id="Full search return" + ), + pytest.param( + [ + "osdu:reference-data--ResourceSecurityClassification:Public:", + "osdu:master-data--Organisation:HESS:", + ], + { + "osdu:reference-data--ResourceSecurityClassification:Public:111", + "osdu:reference-data--ResourceSecurityClassification:Public", + }, + [ + "osdu:master-data--Organisation:HESS:", + ], + id="Partial search return." + ) + ] + ) + def test_find_missing_external_ids( + self, + monkeypatch, + manifest_integrity, + external_references: List[str], + search_response: set, + expected_missing_references: set + ): + entity_ids = [split_id(r) for r in external_references] + monkeypatch.setattr( + ExtendedSearchId, + "search_records", + lambda *args, **kwargs: search_response + ) + missing_ids = manifest_integrity._find_missing_external_ids(entity_ids) + assert not missing_ids.symmetric_difference(expected_missing_references), \ + f'External references {external_references}\n' \ + f'Search response {search_response}\n' \ + f'Expected missing ids {expected_missing_references}' + @pytest.mark.parametrize( "entity,search_return", [ @@ -245,7 +316,7 @@ class TestIntegrityProvider: }, { "osdu:master-data--Organisation:HESS:1", - "osdu:master-data--Organisation:HESS:", + "osdu:master-data--Organisation:HESS", }, id="Search last version"), @@ -256,7 +327,10 @@ class TestIntegrityProvider: "osdu:master-data--Organisation:HESS:1", ] }, - {"osdu:master-data--Organisation:HESS:1"}, + { + "osdu:master-data--Organisation:HESS:1", + "osdu:master-data--Organisation:HESS" + }, id="Search returns all ids"), pytest.param( { @@ -265,8 +339,10 @@ class TestIntegrityProvider: "osdu:master-data--Organisation:HESS:1:", ] }, - {"osdu:master-data--Organisation:HESS:1:1", - "osdu:master-data--Organisation:HESS:1:"}, + { + "osdu:master-data--Organisation:HESS:1:1", + "osdu:master-data--Organisation:HESS:1" + }, id="Integer part of id of reference is not considered as a version") ] )