Commit 120d9c0e authored by Spencer Sutton's avatar Spencer Sutton
Browse files

manifest client parsing

parent e51df0e1
import json
import uuid
import re
from typing import List
from osdu_api.base_client import BaseClient
from osdu_api.storage.record_client import RecordClient
from osdu_api.model.record import Record
from osdu_api.model.http_method import HttpMethod
'''
Holds the logic for ingesting a manifest as records
'''
class ManifestClient(BaseClient):
'''
Inheriting from BaseClient to get needed config values
'''
def __init__(self):
super(ManifestClient, self).__init__()
self.record_client = RecordClient()
'''
Parses a manifest into its distinct records and uses the record client
to ingest those records. It requires that the kind for the record be known in
addition to legal tags, acl_viewers, and acl_owners.
Example of manifest: {
"WorkProduct": {
"ResourceTypeID": "srn:type:work-product/WellboreTrajectory:",
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:",
"Data": {
"GroupTypeProperties": {
"Description": None,
"Schema": None,
"RequireKey": None,
"Components": []
},
"IndividualTypeProperties": {
"Name": "2244.csv",
"Description": "Wellbore Trajectory"
},
"ExtensionProperties": {}
},
"ComponentsAssociativeIDs": ["wpc-1"]
},
"WorkProductComponents": [{
"ResourceTypeID": "srn:type:work-product-component/WellboreTrajectory:",
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:",
"Data": {
"GroupTypeProperties": {
"Description": None,
"Schema": None,
"RequireKey": None,
"Files": [],
"Artefacts": []
},
"IndividualTypeProperties": {
"Name": "2244.csv",
"Description": "Wellbore Trajectory",
"WellboreID": "srn:master-data/Wellbore:2244:"
},
"ExtensionProperties": {}
},
"AssociativeID": "wpc-1",
"FileAssociativeIDs": ["f-1"]
}],
"Files": [{
"AssociativeID": "f-1",
"ResourceTypeID": "srn:type:file/csv:",
"Data": {
"GroupTypeProperties": {
"PreLoadFilePath": "s3://aws-osdu-demo-r1/data/provided/trajectory_csvs/2244.csv",
"TempWorkflowLocation": "s3://aws-osdu-demo-r1/data/provided/trajectory_csvs/2244.csv",
"FileSource": ""
},
"IndividualTypeProperties": {},
"ExtensionProperties": {}
},
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:"
}]
}
'''
def ingest_manifest(self, manifest: dict, kind: str, legal_tags: list,
acl_viewers: list, acl_owners: list):
if not manifest or not kind or not legal_tags or not acl_viewers or not acl_owners:
raise Exception('Missing required parameter from this list: manifest, kind, legal_tags, acl_viewers, acl_owners')
wp_response = self._create_work_product_parent_record(manifest, kind, legal_tags, acl_viewers, acl_owners)
if wp_response.status_code != 201:
raise Exception('Problem creating work product record, status: %s, message: %s' % (wp_response.status_code, wp_response.content))
record_id = self._get_first_record_id_from_response(wp_response)
record_versions = self.record_client.get_record_versions(record_id)
response = self._create_child_records(manifest, kind, legal_tags, acl_viewers, acl_owners, ['%s:%s' % (record_id, record_versions[0])])
if response.status_code != 201:
raise Exception('Problem WPCs\' and files\' records, status: %s, message: %s' % (response.status_code, response.content))
return {'Work Product Response': wp_response, 'Work Product Components and Files Response': response}
def _get_first_record_id_from_response(self, response):
content = json.loads(response.content)
return content['recordIds'][0]
'''
Helper method for ingesting a manifest. Work products need to be created first so the record id is known
for WPCs' and Files' ancestry
'''
def _create_work_product_parent_record(self, manifest: dict, kind: str, legal_tags: list, acl_viewers: list, acl_owners: list):
work_product = manifest['WorkProduct']
work_product['ResourceID'] = self._generate_resource_id(work_product['ResourceTypeID'])
record_id = self._generate_record_id(work_product['ResourceID'])
record = self._populate_request_body(record_id, work_product['Data'], kind, legal_tags, acl_viewers, acl_owners, [])
response = self.record_client.create_update_records_from_dict([record])
return response
'''
Helper method for ingesting a manifest. Creates the rest of a manifest not including work products
'''
def _create_child_records(self, manifest: dict, kind: str, legal_tags: list, acl_viewers: list, acl_owners: list, parents: list):
child_records = []
for wpc in manifest['WorkProductComponents']:
wpc['ResourceID'] = self._generate_resource_id(wpc['ResourceTypeID'])
record_id = self._generate_record_id(wpc['ResourceID'])
child_records.append(self._populate_request_body(record_id, wpc['Data'], kind, legal_tags, acl_viewers, acl_owners, parents))
for file in manifest["Files"]:
file['ResourceID'] = self._generate_resource_id(file['ResourceTypeID'])
record_id = self._generate_record_id(file['ResourceID'])
child_records.append(self._populate_request_body(record_id, file['Data'], kind, legal_tags, acl_viewers, acl_owners, parents))
return self.record_client.create_update_records_from_dict(child_records)
'''
Helper method that puts together the request object that gets passed along to the
record client to send off to storage
'''
def _populate_request_body(self, record_id: str, data, kind: str, legal_tags: list, acl_viewers: list, acl_owners: list, parents: list):
request_body = '{"kind":"",' \
'"id":"",' \
'"legal":{"legaltags":[],' \
'"otherRelevantDataCountries":["US"],' \
'"status":"compliant"},' \
'"acl":{"viewers":[],"owners":[]},' \
'"ancestry":{ "parents":[]},' \
'"meta":[{}],' \
'"data":{}}'
request = json.loads(request_body)
request['kind'] = kind
request['id'] = record_id
request['data'] = data
request['legal']['legaltags'] = legal_tags
request['acl']['viewers'] = acl_viewers
request['acl']['owners'] = acl_owners
request['ancestry']['parents'] = parents
return request
'''
Generates a resource id from the resource type id. Taken from os-manifest-load
'''
def _generate_resource_id(self, type_id):
return '{0}{1}:'.format(type_id.replace('type:',''), re.sub(r"\D", "", str(uuid.uuid4())))
'''
Generates a valid record id from an srn
'''
def _generate_record_id(self, resource_id):
resource_type_name = resource_id.split('/')[-1]
resource_type_pieces = resource_id.split(':')
resource_uuid = ''
resource_type_name = ''
try:
resource_type_name = resource_type_pieces[1].replace('/', '-') #.split('/')[0]
except:
raise Exception('Unable to get resource type name from resource id')
try:
resource_uuid = resource_type_pieces[2]
except:
raise Exception('Unable to get uuid from resource id')
return '%s:%s:%s' % (self.data_partition_id, resource_type_name, resource_uuid)
storage_url: https://d2o4pfwlgp5525.cloudfront.net/api/storage/v2/records
search_url: http://localhost:8085/api/search/v2/query
data_partition_id: opendes
# dynamically injects what provider-specific logic to use
provider: aws
entitlements_module_name: entitlements_client
\ No newline at end of file
storage_url: https://d2o4pfwlgp5525.cloudfront.net/api/storage/v2/records
storage_url: http://localhost:8081/api/storage/v2/records
search_url: http://localhost:8085/api/search/v2/query
data_partition_id: opendes
data_partition_id: stubbed_partition
# dynamically injects what provider-specific logic to use
provider: aws
provider: stubbed_provider
entitlements_module_name: entitlements_client
\ No newline at end of file
import pytest
import mock
import json
import unittest
import types
from unittest.mock import MagicMock
from osdu_api.storage.record_client import RecordClient
from osdu_api.base_client import BaseClient
from osdu_api.ingest.manifest_client import ManifestClient
class TestManifestClient(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.test_manifest = {
"WorkProduct": {
"ResourceTypeID": "srn:type:work-product/WellboreTrajectory:",
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:",
"Data": {
"GroupTypeProperties": {
"Description": None,
"Schema": None,
"RequireKey": None,
"Components": []
},
"IndividualTypeProperties": {
"Name": "2244.csv",
"Description": "Wellbore Trajectory"
},
"ExtensionProperties": {}
},
"ComponentsAssociativeIDs": ["wpc-1"]
},
"WorkProductComponents": [{
"ResourceTypeID": "srn:type:work-product-component/WellboreTrajectory:",
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:",
"Data": {
"GroupTypeProperties": {
"Description": None,
"Schema": None,
"RequireKey": None,
"Files": [],
"Artefacts": []
},
"IndividualTypeProperties": {
"Name": "2244.csv",
"Description": "Wellbore Trajectory",
"WellboreID": "srn:master-data/Wellbore:2244:"
},
"ExtensionProperties": {}
},
"AssociativeID": "wpc-1",
"FileAssociativeIDs": ["f-1"]
}],
"Files": [{
"AssociativeID": "f-1",
"ResourceTypeID": "srn:type:file/csv:",
"Data": {
"GroupTypeProperties": {
"PreLoadFilePath": "s3://aws-osdu-demo-r1/data/provided/trajectory_csvs/2244.csv",
"TempWorkflowLocation": "s3://aws-osdu-demo-r1/data/provided/trajectory_csvs/2244.csv",
"FileSource": ""
},
"IndividualTypeProperties": {},
"ExtensionProperties": {}
},
"ResourceSecurityClassification": "srn:reference-data/ResourceSecurityClassification:RESTRICTED:"
}]
}
self.work_product_record = {
'acl': {'owners': ['data.test1@opendes.testing.com'], 'viewers': ['data.test1@opendes.testing.com']},
'ancestry': {'parents': []},
'data': {
"GroupTypeProperties": {
"Description": None,
"Schema": None,
"RequireKey": None,
"Components": []
},
"IndividualTypeProperties": {
"Name": "2244.csv",
"Description": "Wellbore Trajectory"
},
"ExtensionProperties": {}
},
'id': 'opendes:work-product-WellboreTrajectory:7078798401807363498',
'kind': 'opendes:work-product:WellboreTrajectory:1.0.0',
'legal': {'legaltags': ['opendes-manifest-default'],
'otherRelevantDataCountries': ['US'],
'status': 'compliant'},
'meta': [{}]
}
@mock.patch.object(BaseClient, '_get_bearer_token', return_value="stubbed")
@mock.patch.object(BaseClient, 'make_request', return_value="response")
@mock.patch.object(BaseClient, '_parse_config', return_value="stubbed")
@mock.patch.object(RecordClient, 'get_record_versions', return_value=[555])
def test_ingest_manifest(self, get_bearer_token_mock, make_request_mock, parse_config_mock, get_record_versions_mock):
# Arrange
manifest_client = ManifestClient()
manifest_client.data_partition_id = 'test'
manifest_client.storage_url = 'stubbed url'
manifest_client.record_client.storage_url = 'http://stubbedurl.url'
manifest_client._get_first_record_id_from_response = MagicMock(return_value = 123)
kind = 'opendes:work-product:WellboreTrajectory:1.0.0'
legal_tags = ['opendes-manifest-default']
acl_owners = ['data.test1@opendes.testing.com']
acl_viewers = ['data.test1@opendes.testing.com']
response = mock.Mock()
response.status_code = 201
response.content = '{"recordIds": [123]}'
manifest_client._create_work_product_parent_record = MagicMock(return_value = response)
manifest_client._create_child_records = MagicMock(return_value = response)
# Act
response = manifest_client.ingest_manifest(self.test_manifest, kind, legal_tags, acl_owners, acl_viewers)
# Assert
manifest_client._create_work_product_parent_record.assert_called_with(self.test_manifest, kind, legal_tags, acl_owners, acl_viewers)
manifest_client._create_child_records.assert_called_with(self.test_manifest, kind, legal_tags, acl_owners, acl_viewers, ['123:555'])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment