From 79ef5ed0c5375ca504c3bce4d4a6070fb1d1584d Mon Sep 17 00:00:00 2001 From: Dmitrii <dmitrii_valuiskii@epam.com> Date: Fri, 19 Feb 2021 13:34:28 +0300 Subject: [PATCH] GONRG-1881: add check for WP manifest completeness --- .../validation/validate_data_integrity.py | 22 +++--- .../data_integrity/empty_data_inside.json | 9 +++ .../data/data_integrity/empty_wp.json | 73 +++++++++++++++++++ tests/plugin-unit-tests/file_paths.py | 2 + .../test_data_integrity_validator.py | 20 ++++- 5 files changed, 116 insertions(+), 10 deletions(-) create mode 100644 tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json create mode 100644 tests/plugin-unit-tests/data/data_integrity/empty_wp.json diff --git a/src/dags/libs/validation/validate_data_integrity.py b/src/dags/libs/validation/validate_data_integrity.py index 26a05e7..20d4401 100644 --- a/src/dags/libs/validation/validate_data_integrity.py +++ b/src/dags/libs/validation/validate_data_integrity.py @@ -118,6 +118,8 @@ class DataIntegrityValidator: :return: Validated WorkProduct or empty if invalid. :rtype: Dict[str, Any] """ + if not work_product: + return {} all_wpcs_ids = self._collect_ids_from_object_array(work_product_components) expected_wpc_ids = set(work_product["data"]["Components"]) self._update_ids_from_search(expected_wpc_ids, all_wpcs_ids) @@ -138,14 +140,16 @@ class DataIntegrityValidator: manifest_data = manifest.get("Data") if not manifest_data: return - - valid_wpcs, valid_datasets = self._validate_wpcs_to_datasets( - manifest_data["WorkProductComponents"], manifest_data["Datasets"]) - valid_wp = self._validate_wp_to_wpcs(manifest_data["WorkProduct"], valid_wpcs) - - if valid_wp: - manifest_data["WorkProduct"] = valid_wp - manifest_data["WorkProductComponents"] = valid_wpcs - manifest_data["Datasets"] = valid_datasets + if manifest_data.get("WorkProductComponents") and manifest_data.get("Datasets"): + valid_wpcs, valid_datasets = self._validate_wpcs_to_datasets( + manifest_data["WorkProductComponents"], manifest_data["Datasets"]) + valid_wp = self._validate_wp_to_wpcs(manifest_data["WorkProduct"], valid_wpcs) + + if valid_wp: + manifest_data["WorkProduct"] = valid_wp + manifest_data["WorkProductComponents"] = valid_wpcs + manifest_data["Datasets"] = valid_datasets + else: + manifest["Data"] = {} else: manifest["Data"] = {} diff --git a/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json b/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json new file mode 100644 index 0000000..aa3c17b --- /dev/null +++ b/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json @@ -0,0 +1,9 @@ +{ + "kind": "osdu:wks:Manifest:1.0.0", + "ReferenceData": [], + "MasterData": [], + "Data": { + "WorkProductComponents": [], + "Datasets": [] + } +} diff --git a/tests/plugin-unit-tests/data/data_integrity/empty_wp.json b/tests/plugin-unit-tests/data/data_integrity/empty_wp.json new file mode 100644 index 0000000..4e62d45 --- /dev/null +++ b/tests/plugin-unit-tests/data/data_integrity/empty_wp.json @@ -0,0 +1,73 @@ +{ + "kind": "osdu:wks:Manifest:1.0.0", + "ReferenceData": [], + "MasterData": [], + "Data": { + "WorkProduct": {}, + "WorkProductComponents": [ + { + "id": "surrogate-key:wpc-1", + "kind": "osdu:wks:work-product-component--Document:1.0.0", + "acl": { + "owners": [], + "viewers": [] + }, + "legal": { + "legaltags": [], + "otherRelevantDataCountries": [] + }, + "data": { + "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:", + "Name": "69_D_CH_11", + "Description": "Document", + "Datasets": [ + "surrogate-key:file-1" + ] + } + }, + { + "id": "surrogate-key:wpc-2", + "kind": "osdu:wks:work-product-component--Document:1.0.0", + "acl": { + "owners": [], + "viewers": [] + }, + "legal": { + "legaltags": [], + "otherRelevantDataCountries": [] + }, + "data": { + "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:", + "Name": "69_D_CH_11", + "Description": "Document", + "Datasets": [ + "surrogate-key:file-2" + ] + } + } + ], + "Datasets": [ + { + "id": "surrogate-key:file-1", + "kind": "osdu:wks:dataset--File.Generic:1.0.0", + "acl": { + "owners": [], + "viewers": [] + }, + "legal": { + "legaltags": [], + "otherRelevantDataCountries": [] + }, + "data": { + "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:", + "DatasetProperties": { + "FileSourceInfo": { + "FileSource": "", + "PreloadFilePath": "s3://osdu-seismic-test-data/r1/data/provided/USGS_docs/69_D_CH_11.pdf" + } + } + } + } + ] + } +} diff --git a/tests/plugin-unit-tests/file_paths.py b/tests/plugin-unit-tests/file_paths.py index 243d036..2dfb76e 100644 --- a/tests/plugin-unit-tests/file_paths.py +++ b/tests/plugin-unit-tests/file_paths.py @@ -63,4 +63,6 @@ DATA_INTEGRITY_ORPHAN_DATASETS = f"{DATA_PATH_PREFIX}/data_integrity/orphan_data DATA_INTEGRITY_VALID_WP_INVALID_WPC = f"{DATA_PATH_PREFIX}/data_integrity/valid_wp_invalid_wpc.json" DATA_INTEGRITY_INVALID_WP = f"{DATA_PATH_PREFIX}/data_integrity/invalid_wp.json" DATA_INTEGRITY_EMPTY_DATA = f"{DATA_PATH_PREFIX}/data_integrity/empty_data.json" +DATA_INTEGRITY_EMPTY_DATA_CASE_2 = f"{DATA_PATH_PREFIX}/data_integrity/empty_data_inside.json" +DATA_INTEGRITY_EMPTY_WP = f"{DATA_PATH_PREFIX}/data_integrity/empty_wp.json" DATA_INTEGRITY_VALID_REAL_IDS = f"{DATA_PATH_PREFIX}/data_integrity/valid_data_real_ids.json" diff --git a/tests/plugin-unit-tests/test_data_integrity_validator.py b/tests/plugin-unit-tests/test_data_integrity_validator.py index d202cb5..fb6fe21 100644 --- a/tests/plugin-unit-tests/test_data_integrity_validator.py +++ b/tests/plugin-unit-tests/test_data_integrity_validator.py @@ -25,7 +25,8 @@ import pytest_mock from file_paths import (DATA_INTEGRITY_VALID_DATA, DATA_INTEGRITY_ORPHAN_DATASETS, DATA_INTEGRITY_VALID_WP_INVALID_WPC, DATA_INTEGRITY_INVALID_WP, - DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_VALID_REAL_IDS) + DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_VALID_REAL_IDS, + DATA_INTEGRITY_EMPTY_DATA_CASE_2, DATA_INTEGRITY_EMPTY_WP) from libs.search_client import SearchClient, SearchResponse from libs.validation.validate_data_integrity import DataIntegrityValidator @@ -59,6 +60,23 @@ class TestDataIntegrityValidator: assert expected_manifest == input_manifest + @pytest.mark.parametrize("expected_manifest_path, input_manifest_path", [ + pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_DATA), + pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_DATA_CASE_2), + pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_WP) + ]) + def test_validate_empty_data_integrity(self, mocker: pytest_mock.MockerFixture, provide_manifests, + expected_manifest_path: str, input_manifest_path: str): + """Test validation of datasets dependencies.""" + search_client = mocker.Mock(spec=SearchClient) + data_integrity_validator = DataIntegrityValidator(search_client) + + expected_manifest, input_manifest = provide_manifests + + data_integrity_validator.validate_manifest_data_integrity(input_manifest) + + assert expected_manifest == input_manifest + @pytest.mark.parametrize("expected_manifest_path, input_manifest_path, wpc_ids, datasets_ids", [ pytest.param(DATA_INTEGRITY_VALID_REAL_IDS, DATA_INTEGRITY_VALID_REAL_IDS, ["opendes:work-product-component--GenericWorkProductComponent:1234"], -- GitLab