From 79ef5ed0c5375ca504c3bce4d4a6070fb1d1584d Mon Sep 17 00:00:00 2001
From: Dmitrii <dmitrii_valuiskii@epam.com>
Date: Fri, 19 Feb 2021 13:34:28 +0300
Subject: [PATCH] GONRG-1881: add check for WP manifest completeness

---
 .../validation/validate_data_integrity.py     | 22 +++---
 .../data_integrity/empty_data_inside.json     |  9 +++
 .../data/data_integrity/empty_wp.json         | 73 +++++++++++++++++++
 tests/plugin-unit-tests/file_paths.py         |  2 +
 .../test_data_integrity_validator.py          | 20 ++++-
 5 files changed, 116 insertions(+), 10 deletions(-)
 create mode 100644 tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json
 create mode 100644 tests/plugin-unit-tests/data/data_integrity/empty_wp.json

diff --git a/src/dags/libs/validation/validate_data_integrity.py b/src/dags/libs/validation/validate_data_integrity.py
index 26a05e7..20d4401 100644
--- a/src/dags/libs/validation/validate_data_integrity.py
+++ b/src/dags/libs/validation/validate_data_integrity.py
@@ -118,6 +118,8 @@ class DataIntegrityValidator:
         :return: Validated WorkProduct or empty if invalid.
         :rtype: Dict[str, Any]
         """
+        if not work_product:
+            return {}
         all_wpcs_ids = self._collect_ids_from_object_array(work_product_components)
         expected_wpc_ids = set(work_product["data"]["Components"])
         self._update_ids_from_search(expected_wpc_ids, all_wpcs_ids)
@@ -138,14 +140,16 @@ class DataIntegrityValidator:
         manifest_data = manifest.get("Data")
         if not manifest_data:
             return
-
-        valid_wpcs, valid_datasets = self._validate_wpcs_to_datasets(
-            manifest_data["WorkProductComponents"], manifest_data["Datasets"])
-        valid_wp = self._validate_wp_to_wpcs(manifest_data["WorkProduct"], valid_wpcs)
-
-        if valid_wp:
-            manifest_data["WorkProduct"] = valid_wp
-            manifest_data["WorkProductComponents"] = valid_wpcs
-            manifest_data["Datasets"] = valid_datasets
+        if manifest_data.get("WorkProductComponents") and manifest_data.get("Datasets"):
+            valid_wpcs, valid_datasets = self._validate_wpcs_to_datasets(
+                manifest_data["WorkProductComponents"], manifest_data["Datasets"])
+            valid_wp = self._validate_wp_to_wpcs(manifest_data["WorkProduct"], valid_wpcs)
+
+            if valid_wp:
+                manifest_data["WorkProduct"] = valid_wp
+                manifest_data["WorkProductComponents"] = valid_wpcs
+                manifest_data["Datasets"] = valid_datasets
+            else:
+                manifest["Data"] = {}
         else:
             manifest["Data"] = {}
diff --git a/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json b/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json
new file mode 100644
index 0000000..aa3c17b
--- /dev/null
+++ b/tests/plugin-unit-tests/data/data_integrity/empty_data_inside.json
@@ -0,0 +1,9 @@
+{
+    "kind": "osdu:wks:Manifest:1.0.0",
+    "ReferenceData": [],
+    "MasterData": [],
+    "Data": {
+        "WorkProductComponents": [],
+        "Datasets": []
+    }
+}
diff --git a/tests/plugin-unit-tests/data/data_integrity/empty_wp.json b/tests/plugin-unit-tests/data/data_integrity/empty_wp.json
new file mode 100644
index 0000000..4e62d45
--- /dev/null
+++ b/tests/plugin-unit-tests/data/data_integrity/empty_wp.json
@@ -0,0 +1,73 @@
+{
+    "kind": "osdu:wks:Manifest:1.0.0",
+    "ReferenceData": [],
+    "MasterData": [],
+    "Data": {
+        "WorkProduct": {},
+        "WorkProductComponents": [
+            {
+                "id": "surrogate-key:wpc-1",
+                "kind": "osdu:wks:work-product-component--Document:1.0.0",
+                "acl": {
+                    "owners": [],
+                    "viewers": []
+                },
+                "legal": {
+                    "legaltags": [],
+                    "otherRelevantDataCountries": []
+                },
+                "data": {
+                    "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:",
+                    "Name": "69_D_CH_11",
+                    "Description": "Document",
+                    "Datasets": [
+                        "surrogate-key:file-1"
+                    ]
+                }
+            },
+            {
+                "id": "surrogate-key:wpc-2",
+                "kind": "osdu:wks:work-product-component--Document:1.0.0",
+                "acl": {
+                    "owners": [],
+                    "viewers": []
+                },
+                "legal": {
+                    "legaltags": [],
+                    "otherRelevantDataCountries": []
+                },
+                "data": {
+                    "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:",
+                    "Name": "69_D_CH_11",
+                    "Description": "Document",
+                    "Datasets": [
+                        "surrogate-key:file-2"
+                    ]
+                }
+            }
+        ],
+        "Datasets": [
+            {
+                "id": "surrogate-key:file-1",
+                "kind": "osdu:wks:dataset--File.Generic:1.0.0",
+                "acl": {
+                    "owners": [],
+                    "viewers": []
+                },
+                "legal": {
+                    "legaltags": [],
+                    "otherRelevantDataCountries": []
+                },
+                "data": {
+                    "ResourceSecurityClassification": "osdu:reference-data--ResourceSecurityClassification:RESTRICTED:",
+                    "DatasetProperties": {
+                        "FileSourceInfo": {
+                            "FileSource": "",
+                            "PreloadFilePath": "s3://osdu-seismic-test-data/r1/data/provided/USGS_docs/69_D_CH_11.pdf"
+                        }
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/tests/plugin-unit-tests/file_paths.py b/tests/plugin-unit-tests/file_paths.py
index 243d036..2dfb76e 100644
--- a/tests/plugin-unit-tests/file_paths.py
+++ b/tests/plugin-unit-tests/file_paths.py
@@ -63,4 +63,6 @@ DATA_INTEGRITY_ORPHAN_DATASETS = f"{DATA_PATH_PREFIX}/data_integrity/orphan_data
 DATA_INTEGRITY_VALID_WP_INVALID_WPC = f"{DATA_PATH_PREFIX}/data_integrity/valid_wp_invalid_wpc.json"
 DATA_INTEGRITY_INVALID_WP = f"{DATA_PATH_PREFIX}/data_integrity/invalid_wp.json"
 DATA_INTEGRITY_EMPTY_DATA = f"{DATA_PATH_PREFIX}/data_integrity/empty_data.json"
+DATA_INTEGRITY_EMPTY_DATA_CASE_2 = f"{DATA_PATH_PREFIX}/data_integrity/empty_data_inside.json"
+DATA_INTEGRITY_EMPTY_WP = f"{DATA_PATH_PREFIX}/data_integrity/empty_wp.json"
 DATA_INTEGRITY_VALID_REAL_IDS = f"{DATA_PATH_PREFIX}/data_integrity/valid_data_real_ids.json"
diff --git a/tests/plugin-unit-tests/test_data_integrity_validator.py b/tests/plugin-unit-tests/test_data_integrity_validator.py
index d202cb5..fb6fe21 100644
--- a/tests/plugin-unit-tests/test_data_integrity_validator.py
+++ b/tests/plugin-unit-tests/test_data_integrity_validator.py
@@ -25,7 +25,8 @@ import pytest_mock
 
 from file_paths import (DATA_INTEGRITY_VALID_DATA, DATA_INTEGRITY_ORPHAN_DATASETS,
                         DATA_INTEGRITY_VALID_WP_INVALID_WPC, DATA_INTEGRITY_INVALID_WP,
-                        DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_VALID_REAL_IDS)
+                        DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_VALID_REAL_IDS,
+                        DATA_INTEGRITY_EMPTY_DATA_CASE_2, DATA_INTEGRITY_EMPTY_WP)
 from libs.search_client import SearchClient, SearchResponse
 from libs.validation.validate_data_integrity import DataIntegrityValidator
 
@@ -59,6 +60,23 @@ class TestDataIntegrityValidator:
 
         assert expected_manifest == input_manifest
 
+    @pytest.mark.parametrize("expected_manifest_path, input_manifest_path", [
+        pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_DATA),
+        pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_DATA_CASE_2),
+        pytest.param(DATA_INTEGRITY_EMPTY_DATA, DATA_INTEGRITY_EMPTY_WP)
+    ])
+    def test_validate_empty_data_integrity(self, mocker: pytest_mock.MockerFixture, provide_manifests,
+                                     expected_manifest_path: str, input_manifest_path: str):
+        """Test validation of datasets dependencies."""
+        search_client = mocker.Mock(spec=SearchClient)
+        data_integrity_validator = DataIntegrityValidator(search_client)
+
+        expected_manifest, input_manifest = provide_manifests
+
+        data_integrity_validator.validate_manifest_data_integrity(input_manifest)
+
+        assert expected_manifest == input_manifest
+
     @pytest.mark.parametrize("expected_manifest_path, input_manifest_path, wpc_ids, datasets_ids", [
         pytest.param(DATA_INTEGRITY_VALID_REAL_IDS, DATA_INTEGRITY_VALID_REAL_IDS,
                      ["opendes:work-product-component--GenericWorkProductComponent:1234"],
-- 
GitLab