Skip to content
Snippets Groups Projects
Commit fab381c4 authored by Siarhei Khaletski (EPAM)'s avatar Siarhei Khaletski (EPAM) :triangular_flag_on_post:
Browse files

Merge branch 'GONRG-3109_move_common_logic' into 'master'

GONRG-3109: Move common logic to osdu-airflow-lib

See merge request !76
parents d217212e 23dcb378
No related branches found
No related tags found
1 merge request!76GONRG-3109: Move common logic to osdu-airflow-lib
Pipeline #65237 passed
Showing with 1 addition and 2610 deletions
{
"x-osdu-license": "Copyright 2021, The Open Group \\nLicensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 . Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.",
"$id": "https://schema.osdu.opengroup.org/json/manifest/Manifest.1.0.0.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Load Manifest Schema",
"description": "Load manifest applicable for all types defined as 'kind', i.e. registered as schemas with the Schema Service. It supports loading of individual 'records' of any group-type or combinations. The load sequence follows a well-defined sequence. The 'ReferenceData' array is processed first (if populated). The 'MasterData' array is processed second (if populated) second. The 'Data' structure is processed last (if populated). Inside the 'Data' property the 'Files' array is processed first, followed by the 'WorkProductComponents' array, the 'WorkProduct' is processed last. Any arrays are ordered. should there be interdependencies, the dependent items must be placed behind their relationship targets, e.g. a master-data Well record must placed in the 'MasterData' array before its Wellbores.",
"type": "object",
"properties": {
"kind": {
"description": "The schema identification for the manifest record following the pattern {Namespace}:{Source}:{Type}:{VersionMajor}.{VersionMinor}.{VersionPatch}. The versioning scheme follows the semantic versioning, https://semver.org/.",
"title": "Manifest Kind",
"type": "string",
"pattern": "^[\\w\\-\\.]+:[\\w\\-\\.]+:[\\w\\-\\.\\/]+:[0-9]+.[0-9]+.[0-9]+$",
"example": "osdu:wks:Manifest:1.0.0"
},
"ReferenceData": {
"description": "Reference-data are submitted as an array of records.",
"type": "array",
"items": {
"$ref": "GenericReferenceData.1.0.0.json"
}
},
"MasterData": {
"description": "Master-data are submitted as an array of records.",
"type": "array",
"items": {
"$ref": "GenericMasterData.1.0.0.json"
}
},
"Data": {
"description": "Manifest schema for work-product, work-product-component, dataset ensembles. The items in 'Datasets' are processed first since they are referenced by 'WorkProductComponents' ('data.DatasetIDs[]' and 'data.Artefacts[].ResourceID'). The WorkProduct is processed last collecting the WorkProductComponents.",
"type": "object",
"properties": {
"WorkProduct": {
"description": "The work-product component capturing the work-product-component records belonging to this loading/ingestion transaction.",
"$ref": "GenericWorkProduct.1.0.0.json"
},
"WorkProductComponents": {
"description": "The list of work-product-components records. The record ids are internal surrogate keys enabling the association of work-product-component records with the work-product records.",
"type": "array",
"items": {
"$ref": "GenericWorkProductComponent.1.0.0.json"
}
},
"Datasets": {
"description": "The list of 'Datasets' or data containers holding the actual data. The record ids are usually internal surrogate keys enabling the association of dataset records with work-product-component records, namely via 'DatasetIDs' and 'Artefacts.ResourceID' (both referring to 'dataset' group-type entity types).",
"type": "array",
"items": {
"$ref": "GenericDataset.1.0.0.json"
}
}
}
}
}
}
{
"execution_context": {
"Payload": {
"authorization": "Bearer test",
"data-partition-id": "opendes",
"AppKey": "",
"kind_version": "3.0.0"
},
"$schema": "https://schema.osdu.opengroup.org/json/master-data/Wellbore.1.0.0.json",
"$filename": "load_Wellbore.1.0.0_350112350400.json",
"manifest": {
"kind": "test:test:Manifest:1.0.0",
"ReferenceData": [],
"MasterData": [
{
"id": "opendes:master-data/Wellbore:350112350400",
"kind": "opendes:osdu:TestMaster:0.3.0",
"groupType": "master-data",
"version": 1,
"acl": {
"owners": [
"data.default.viewers@opendes.osdu-gcp.go3-nrg.projects.epam.com"
],
"viewers": [
"data.default.owners@opendes.osdu-gcp.go3-nrg.projects.epam.com"
]
},
"legal": {
"legaltags": [
"opendes-demo-legaltag"
],
"otherRelevantDataCountries": [
"srn:opendes:master-data/GeoPoliticalEntity:USA:"
],
"status": "srn:opendes:reference-data/LegalStatus:public:1111"
},
"resourceHostRegionIDs": [
"srn:opendes:reference-data/OSDURegion:US-EAST:"
],
"resourceObjectCreationDateTime": "2020-10-16T11:14:45-05:00",
"resourceVersionCreationDateTime": "2020-10-16T11:14:45-05:00",
"resourceSecurityClassification": "srn:opendes:reference-data/ResourceSecurityClassification:public:",
"source": "srn:opendes:master-data/Organisation:Oklahoma Corporation Commission:",
"existenceKind": "srn:opendes:reference-data/ExistenceKind:Active:",
"licenseState": "srn:opendes:reference-data/LicenseState:Unlicensed:",
"data": {
"SequenceNumber": 1
},
"schema": "test:test:GenericMasterData:1.0.0"
}
],
"Data": {}
}
},
"workflow_name": "osdu_ingest",
"run_id": "foo"
}
This diff is collapsed.
{
"results": [
{
"id": "some_test_id",
"version": 12345
}
],
"aggregations": null,
"totalCount": 45
}
# Copyright 2020 Google LLC
# Copyright 2020 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ACL_DICT = {'viewers': ['data.default.viewers@odes.osdu.test.net'],'owners': ['data.default.owners@odes.osdu.test.net']}
LEGAL_DICT = {'legaltags': ['odes-demo-legaltag'], 'otherRelevantDataCountries': ['FR', 'US', 'CA'],'status': 'compliant'}
CONF = {
"WorkProduct": {
"ResourceTypeID": "srn:type:work-product/WellLog:",
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:",
"Data": {
"GroupTypeProperties": {
"Components": []
},
"IndividualTypeProperties": {
"Name": "Test AKM LOG 111",
"Description": "Well Log"
},
"ExtensionProperties": {}
},
"ComponentsAssociativeIDs": [
"wpc-1"
]
},
"WorkProductComponents": [
{
"ResourceTypeID": "srn:type:work-product-component/WellLog:",
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:",
"Data": {
"GroupTypeProperties": {
"Files": [],
"Artefacts": []
},
"AssociativeID": "wpc-1",
"FileAssociativeIDs": [
"f-1"
]
}
}
],
"Payload": {
"authorization": "Bearer test",
"data-partition-id": "test",
"AppKey": "test",
"kind_version": "3.0.0",
"acl": {
"viewers": ["data.default.viewers@odes.osdu.joonix.net"],
"owners": ["data.default.owners@odes.osdu.joonix.net"]},
"legal": {
"legaltags": ["odes-demo-legaltag"],
"otherRelevantDataCountries": ["FR", "US", "CA"]}
},
"Files": [
{
"ResourceTypeID": "srn:type:file/las2:",
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:",
"Data": {
"GroupTypeProperties": {
"FileSource": "",
"PreLoadFilePath": "foo"
},
"IndividualTypeProperties": {},
"ExtensionProperties": {}
},
"AssociativeID": "f-1"
}
],
"WorkflowID": "foo"
}
PROCESS_FILE_ITEMS_RESULT = (
[
(
{
'kind': 'test:osdu:file:3.0.0',
'legal': {'legaltags': ['odes-demo-legaltag'], 'otherRelevantDataCountries': ['US'], 'status': 'compliant'},
'acl': {'viewers': ['data.default.viewers@odes.osdu.test.net'],
'owners': ['data.default.owners@odes.osdu.test.net']},
'data': {
'ResourceTypeID': 'srn:type:file/las2:',
'ResourceSecurityClassification': 'srn:reference-data/ResourceSecurityClassification:RESTRICTED:',
'Data': {'GroupTypeProperties': {'FileSource': '', 'PreLoadFilePath': 'foo'}, 'IndividualTypeProperties': {}, 'ExtensionProperties': {}},
'AssociativeID': 'f-1',
'ResourceID': ""
}
},
'File'
)
],
['srn:file/las2:434064998475386:']
)
LOADED_CONF = {
"acl": ACL_DICT,
"legal_tag": LEGAL_DICT,
"data_object": CONF
}
CONF_PAYLOAD = CONF["Payload"]
class DAG_RUN:
def __init__(self):
self.conf = CONF
DAG_RUN_CONF = {
"dag_run": DAG_RUN()
}
This diff is collapsed.
# Copyright 2020 Google LLC
# Copyright 2020 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
DATA_PATH_PREFIX = f"{os.path.dirname(__file__)}/data"
MANIFEST_GENERIC_SCHEMA_PATH = f"{DATA_PATH_PREFIX}/manifests/schema_Manifest.1.0.0.json"
MANIFEST_WELLBORE_VALID_PATH = f"{DATA_PATH_PREFIX}/master/Wellbore.0.3.0.json"
MANIFEST_BATCH_WELLBORE_VALID_PATH = f"{DATA_PATH_PREFIX}/master/batch_Wellbore.0.3.0.json"
SEARCH_VALID_RESPONSE_PATH = f"{DATA_PATH_PREFIX}/other/SearchResponseValid.json"
# Copyright 2021 Google LLC
# Copyright 2021 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mock providers module."""
import io
import logging
from typing import Tuple
from osdu_api.providers.blob_storage import get_client
from osdu_api.providers.credentials import get_credentials
from osdu_api.providers.factory import ProvidersFactory
from osdu_api.providers.types import BlobStorageClient, BaseCredentials
logger = logging.getLogger(__name__)
@ProvidersFactory.register("provider_test")
class MockCredentials(BaseCredentials):
"""Mock Credentials Provider."""
def __init__(self):
self._access_token = "test_token"
def refresh_token(self) -> str:
"""Refresh token.
:return: Refreshed token
:rtype: str
"""
logger.info("Refreshed token in test.")
@property
def access_token(self) -> str:
"""The access token.
:return: Access token string.
:rtype: str
"""
return self._access_token
@access_token.setter
def access_token(self, token: str):
"""Set access token
:param val: The access token
:type val: str
:return: [description]
:rtype: [type]
"""
self._access_token = token
@ProvidersFactory.register("provider_test")
class GoogleCloudStorageClient(BlobStorageClient):
"""Mock BlobStorage Provider."""
def download_to_file(self, uri: str, file: io.BytesIO) -> Tuple[io.BytesIO, str]:
"""Download file from the given URI.
:param uri: The full URI of the file.
:type uri: str
:param file: a file like object
:type file: io.BytesIO
:return: A tuple containing the file and its content-type
:rtype: Tuple[io.BytesIO, str]
"""
pass
def download_file_as_bytes(self, uri: str) -> Tuple[bytes, str]:
"""Download file as bytes from the given URI.
:param uri: The full URI of the file
:type uri: str
:return: The file as bytes and its content-type
:rtype: Tuple[bytes, str]
"""
pass
def upload_file(self, uri: str, file: io.BytesIO, content_type: str):
"""Upload blob to given URI.
:param uri: The full target URI of the resource to upload.
:type uri: str
:param file: The file to upload
:type file: FileLikeObject
:param content_type: The content-type of the file to uplaod
:type content_type: str
"""
pass
def does_file_exist(self, uri: str):
"""Verify if a resource exists in the given URI.
:param uri: The URI of the resource to verify
:type uri: str
"""
pass
def get_test_credentials():
"""Utiltiy to get the credentials to use in tests."""
return get_credentials("provider_test")
def get_test_blob_storage_client():
"""Utility to get blob storage client to use in tests."""
return get_client("provider_test")
# Copyright 2020 Google LLC
# Copyright 2020 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import http
import requests
class MockResponse(requests.Response):
"""
Mock response is used for monkey patching requests' methods.
Example usage: monkeypatch.setattr(
requests, "get", lambda *args, **kwargs: MockResponse(http.HTTPStatus.OK)
)
"""
def __init__(self, status_code: http.HTTPStatus):
super(MockResponse, self).__init__()
self.status_code = status_code
self.url = "Test"
self.reason = "Test"
@property
def text(self):
return None
class MockWorkflowResponse(MockResponse):
def __init__(self, json: str = "", status_code: http.HTTPStatus = http.HTTPStatus.OK):
super().__init__(status_code)
self._json = json
def json(self):
return self._json
# Copyright 2020 Google LLC
# Copyright 2020 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import http
import json
import os
import sys
from datetime import datetime
from typing import TypeVar, ClassVar
from airflow import DAG
from airflow.models import TaskInstance
sys.path.append(f"{os.getenv('AIRFLOW_SRC_DIR')}/plugins")
sys.path.append(f"{os.getenv('AIRFLOW_SRC_DIR')}/dags")
from osdu_api.libs.exceptions import PipelineFailedError
import pytest
import requests
import mock_providers
from functools import lru_cache
from file_paths import (
MANIFEST_WELLBORE_VALID_PATH,
SEARCH_VALID_RESPONSE_PATH, MANIFEST_GENERIC_SCHEMA_PATH, MANIFEST_BATCH_WELLBORE_VALID_PATH)
from operators.process_manifest_r3 import ProcessManifestOperatorR3, SchemaValidator, \
ManifestProcessor
from operators.update_status import UpdateStatusOperator
from osdu_api.libs.handle_file import FileHandler
from mock_responses import MockWorkflowResponse
CustomOperator = TypeVar("CustomOperator")
class MockDagRun:
def __init__(self, conf):
self.conf = conf
class MockStorageResponse(requests.Response):
def json(self, **kwargs):
return {"recordIds": ["test"]}
class TestOperators(object):
def _create_batch_task(self, operator: ClassVar[CustomOperator]) -> (CustomOperator, dict):
with open(MANIFEST_BATCH_WELLBORE_VALID_PATH) as f:
conf = json.load(f)
dag = DAG(dag_id='batch_osdu_ingest', start_date=datetime.now())
task: CustomOperator = operator(dag=dag, task_id='anytask')
ti = TaskInstance(task=task, execution_date=datetime.now())
context = ti.get_template_context()
context["dag_run"] = MockDagRun(conf)
return task, context
def _create_task(self, operator: ClassVar[CustomOperator]) -> (CustomOperator, dict):
with open(MANIFEST_WELLBORE_VALID_PATH) as f:
conf = json.load(f)
dag = DAG(dag_id='Osdu_ingest', start_date=datetime.now())
task: CustomOperator = operator(dag=dag, task_id='anytask')
ti = TaskInstance(task=task, execution_date=datetime.now())
context = ti.get_template_context()
context["dag_run"] = MockDagRun(conf)
return task, context
def test_process_manifest_r3_operator(self, monkeypatch):
@lru_cache()
def _get_common_schema(*args, **kwargs):
with open(MANIFEST_GENERIC_SCHEMA_PATH) as f:
manifest_schema = json.load(f)
return manifest_schema
monkeypatch.setattr(SchemaValidator, "get_schema", _get_common_schema)
monkeypatch.setattr(SchemaValidator, "_validate_against_schema", lambda *args, **kwargs: None)
monkeypatch.setattr(SchemaValidator, "validate_manifest", lambda obj, entities: entities)
monkeypatch.setattr(ManifestProcessor, "save_record_to_storage",
lambda obj, headers, request_data: MockStorageResponse())
monkeypatch.setattr(FileHandler, "upload_file",
lambda *args, **kwargs: "test")
task, context = self._create_task(ProcessManifestOperatorR3)
task.pre_execute(context)
task.execute(context)
def test_process_manifest_r3_operator_batch(self, monkeypatch):
def _get_common_schema(*args, **kwargs):
with open(MANIFEST_GENERIC_SCHEMA_PATH) as f:
manifest_schema = json.load(f)
return manifest_schema
monkeypatch.setattr(SchemaValidator, "get_schema", _get_common_schema)
monkeypatch.setattr(SchemaValidator, "_validate_against_schema", lambda *args, **kwargs: None)
monkeypatch.setattr(SchemaValidator, "validate_manifest", lambda obj, entities: (entities, []))
monkeypatch.setattr(ManifestProcessor, "save_record_to_storage",
lambda obj, headers, request_data: MockStorageResponse())
monkeypatch.setattr(FileHandler, "upload_file",
lambda *args, **kwargs: "test")
task, context = self._create_batch_task(ProcessManifestOperatorR3)
task.pre_execute(context)
task.execute(context)
def _test_update_status_operator(self, monkeypatch, status: UpdateStatusOperator.prev_ti_state):
monkeypatch.setattr(UpdateStatusOperator, "get_previous_ti_statuses",
lambda obj, context: status)
monkeypatch.setattr(requests, "put", lambda *args, **kwargs: MockWorkflowResponse(
status_code=http.HTTPStatus.OK, json="test"))
task, context = self._create_task(UpdateStatusOperator)
task.pre_execute(context)
task.execute(context)
@pytest.mark.parametrize(
"status",
[
pytest.param(
UpdateStatusOperator.prev_ti_state.NONE
),
pytest.param(
UpdateStatusOperator.prev_ti_state.SUCCESS
)
]
)
def test_update_status_operator(self, monkeypatch, status):
self._test_update_status_operator(monkeypatch, status)
@pytest.mark.parametrize(
"status",
[
pytest.param(
UpdateStatusOperator.prev_ti_state.FAILED
)
]
)
def test_update_status_operator_failed(self, monkeypatch, status):
"""
Test if operator raises PipeLineFailedError if any previous task failed.
"""
with pytest.raises(PipelineFailedError):
self._test_update_status_operator(monkeypatch, status)
......@@ -25,7 +25,7 @@ pip install azure-keyvault-secrets
pip install msal
pip install python-keycloak
pip install osdu-api==0.10.1.dev0+92014f64 --extra-index-url https://community.opengroup.org/api/v4/projects/148/packages/pypi/simple
pip install osdu-airflow --extra-index-url=https://community.opengroup.org/api/v4/projects/668/packages/pypi/simple
pip install osdu-airflow==0.0.1.dev31+59e58330 --extra-index-url=https://community.opengroup.org/api/v4/projects/668/packages/pypi/simple
export WORKFLOW_URL="http://127.0.0.1:5000"
export UPDATE_STATUS_URL="http://127.0.0.1:5000/wf/us"
export STORAGE_URL="http://127.0.0.1:5000/st"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment