diff --git a/src/dags/libs/validation/validate_data_integrity.py b/src/dags/libs/validation/validate_data_integrity.py index 26a05e7c7d8f12a06ce09037c4b97985ff726252..20d440169ad1f4dd6f4080e48ce0902ebb726edf 100644 --- a/src/dags/libs/validation/validate_data_integrity.py +++ b/src/dags/libs/validation/validate_data_integrity.py @@ -118,6 +118,8 @@ class DataIntegrityValidator: :return: Validated WorkProduct or empty if invalid. :rtype: Dict[str, Any] """ + if not work_product: + return {} all_wpcs_ids = self._collect_ids_from_object_array(work_product_components) expected_wpc_ids = set(work_product["data"]["Components"]) self._update_ids_from_search(expected_wpc_ids, all_wpcs_ids) @@ -138,14 +140,16 @@ class DataIntegrityValidator: manifest_data = manifest.get("Data") if not manifest_data: return - - valid_wpcs, valid_datasets = self._validate_wpcs_to_datasets( - manifest_data["WorkProductComponents"], manifest_data["Datasets"]) - valid_wp = self._validate_wp_to_wpcs(manifest_data["WorkProduct"], valid_wpcs) - - if valid_wp: - manifest_data["WorkProduct"] = valid_wp - manifest_data["WorkProductComponents"] = valid_wpcs - manifest_data["Datasets"] = valid_datasets + if manifest_data.get("WorkProductComponents") and manifest_data.get("Datasets"): + valid_wpcs, valid_datasets = self._validate_wpcs_to_datasets( + manifest_data["WorkProductComponents"], manifest_data["Datasets"]) + valid_wp = self._validate_wp_to_wpcs(manifest_data["WorkProduct"], valid_wpcs) + + if valid_wp: + manifest_data["WorkProduct"] = valid_wp + manifest_data["WorkProductComponents"] = valid_wpcs + manifest_data["Datasets"] = valid_datasets + else: + manifest["Data"] = {} else: manifest["Data"] = {} diff --git a/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json b/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json new file mode 100644 index 0000000000000000000000000000000000000000..aa3c17bcbaf2b2aac453123234eea4d46cf259b5 --- /dev/null +++ b/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json @@ -0,0 +1,9 @@ +{ + "kind": "osdu:wks:Manifest:1.0.0", + "ReferenceData": [], + "MasterData": [], + "Data": { + "WorkProductComponents": [], + "Datasets": [] + } +} diff --git a/tests/plugin-unit-tests/data/data_integrity/empty_wp.json b/tests/plugin-unit-tests/data/data_integrity/empty_wp.json new file mode 100644 index 0000000000000000000000000000000000000000..4e62d45bd2c1706a7c3928673d662ab14b6eb331 --- /dev/null +++ b/tests/plugin-unit-tests/data/data_integrity/empty_wp.json @@ -0,0 +1,73 @@ +{ + "kind": "osdu:wks:Manifest:1.0.0", + "ReferenceData": [], + "MasterData": [], + "Data": { + "WorkProduct": {}, + "WorkProductComponents": [ + { + "id": "surrogate-key:wpc-1", + "kind": "osdu:wks:work-product-component--Document:1.0.0", + "acl": { + "owners": [], + "viewers": [] + }, + "legal": { + "legaltags": [], + "otherRelevantDataCountries": [] + }, + "data": { + "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:", + "Name": "69_D_CH_11", + "Description": "Document", + "Datasets": [ + "surrogate-key:file-1" + ] + } + }, + { + "id": "surrogate-key:wpc-2", + "kind": "osdu:wks:work-product-component--Document:1.0.0", + "acl": { + "owners": [], + "viewers": [] + }, + "legal": { + "legaltags": [], + "otherRelevantDataCountries": [] + }, + "data": { + "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:", + "Name": "69_D_CH_11", + "Description": "Document", + "Datasets": [ + "surrogate-key:file-2" + ] + } + } + ], + "Datasets": [ + { + "id": "surrogate-key:file-1", + "kind": "osdu:wks:dataset--File.Generic:1.0.0", + "acl": { + "owners": [], + "viewers": [] + }, + "legal": { + "legaltags": [], + "otherRelevantDataCountries": [] + }, + "data": { + "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:", + "DatasetProperties": { + "FileSourceInfo": { + "FileSource": "", + "PreloadFilePath": "s3://osdu-seismic-test-data/r1/data/provided/USGS_docs/69_D_CH_11.pdf" + } + } + } + } + ] + } +} diff --git a/tests/plugin-unit-tests/file_paths.py b/tests/plugin-unit-tests/file_paths.py index 243d036720e817ab5cf05bfe331b33866a6f3226..2dfb76ead661e225bf27f30082fb3578c70b83fe 100644 --- a/tests/plugin-unit-tests/file_paths.py +++ b/tests/plugin-unit-tests/file_paths.py @@ -63,4 +63,6 @@ DATA_INTEGRITY_ORPHAN_DATASETS = f"{DATA_PATH_PREFIX}/data_integrity/orphan_data DATA_INTEGRITY_VALID_WP_INVALID_WPC = f"{DATA_PATH_PREFIX}/data_integrity/valid_wp_invalid_wpc.json" DATA_INTEGRITY_INVALID_WP = f"{DATA_PATH_PREFIX}/data_integrity/invalid_wp.json" DATA_INTEGRITY_EMPTY_DATA = f"{DATA_PATH_PREFIX}/data_integrity/empty_data.json" +DATA_INTEGRITY_EMPTY_DATA_CASE_2 = f"{DATA_PATH_PREFIX}/data_integrity/empty_data_inside.json" +DATA_INTEGRITY_EMPTY_WP = f"{DATA_PATH_PREFIX}/data_integrity/empty_wp.json" DATA_INTEGRITY_VALID_REAL_IDS = f"{DATA_PATH_PREFIX}/data_integrity/valid_data_real_ids.json" diff --git a/tests/plugin-unit-tests/test_data_integrity_validator.py b/tests/plugin-unit-tests/test_data_integrity_validator.py index d202cb582a1a031f8382c669b3a3fd33cd966569..fb6fe216c5719d1333c46cd211933c12463061fa 100644 --- a/tests/plugin-unit-tests/test_data_integrity_validator.py +++ b/tests/plugin-unit-tests/test_data_integrity_validator.py @@ -25,7 +25,8 @@ import pytest_mock from file_paths import (DATA_INTEGRITY_VALID_DATA, DATA_INTEGRITY_ORPHAN_DATASETS, DATA_INTEGRITY_VALID_WP_INVALID_WPC, DATA_INTEGRITY_INVALID_WP, - DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_VALID_REAL_IDS) + DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_VALID_REAL_IDS, + DATA_INTEGRITY_EMPTY_DATA_CASE_2, DATA_INTEGRITY_EMPTY_WP) from libs.search_client import SearchClient, SearchResponse from libs.validation.validate_data_integrity import DataIntegrityValidator @@ -59,6 +60,23 @@ class TestDataIntegrityValidator: assert expected_manifest == input_manifest + @pytest.mark.parametrize("expected_manifest_path, input_manifest_path", [ + pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_DATA), + pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_DATA_CASE_2), + pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_WP) + ]) + def test_validate_empty_data_integrity(self, mocker: pytest_mock.MockerFixture, provide_manifests, + expected_manifest_path: str, input_manifest_path: str): + """Test validation of datasets dependencies.""" + search_client = mocker.Mock(spec=SearchClient) + data_integrity_validator = DataIntegrityValidator(search_client) + + expected_manifest, input_manifest = provide_manifests + + data_integrity_validator.validate_manifest_data_integrity(input_manifest) + + assert expected_manifest == input_manifest + @pytest.mark.parametrize("expected_manifest_path, input_manifest_path, wpc_ids, datasets_ids", [ pytest.param(DATA_INTEGRITY_VALID_REAL_IDS, DATA_INTEGRITY_VALID_REAL_IDS, ["opendes:work-product-component--GenericWorkProductComponent:1234"],