Commit 9ed23c4a authored by Spencer Sutton's avatar Spencer Sutton
Browse files

Merge branch 'consolidation' into 'master'

Consolidated libs code

See merge request !12
parents d142383a 3832374a
......@@ -6,3 +6,4 @@ __pycache__
**/venv/**
**/.idea/**
.vscode/
.DS_Store
\ No newline at end of file
......@@ -26,6 +26,7 @@ env:
DATA_WORKFLOW_BASE_URL: stub
FILE_DMS_BASE_URL: stub
DATASET_REGISTRY_BASE_URL: stub
SCHEMA_BASE_URL: stub
AUTH_TOKEN_URL: stub
CUSTOM_SCOPE: stub
ENVIRONMENT: stub
......@@ -53,7 +54,7 @@ phases:
# publish new artifact to code artifact
- aws codeartifact login --tool twine --domain osdu-dev --domain-owner 888733619319 --repository osdu-python
- python setup.py sdist bdist_wheel
- twine upload --skip-existing --repository codeartifact dist/osdu_api-0.0.4.tar.gz
- twine upload --skip-existing --repository codeartifact dist/osdu_api-0.0.5.tar.gz
artifacts:
......
# Copyright 2020 Google LLC
# Copyright © 2020 Amazon Web Services
# Copyright © 2020 Amazon Web Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys, os
import importlib
from configparser import SafeConfigParser
import requests
from osdu_api.libs.configuration.config_manager import ConfigManager
from osdu_api.libs.context.context import Context
from osdu_api.libs.exceptions.exceptions import UnknownRequestMethodError, ConfigurationError
from osdu_api.libs.auth.authorization import TokenRefresher, authorize
from osdu_api.model.http_method import HttpMethod
'''
Base client that is meant to be extended by service specific clients
'''
class BaseClient:
"""
Base client that is meant to be extended by service specific clients.
"""
def __init__(self, token_refresher: TokenRefresher, context: Context):
self._config_manager = ConfigManager()
self.token_refresher = token_refresher
self.data_partition_id = context.data_partition_id
'''
Base client gets initialized with configuration values and a bearer token
based on provider-specific logic
'''
def __init__(self):
self._parse_config()
self.unauth_retries = 0
if self.use_service_principal == 'True' or self.use_service_principal == 'true':
self._refresh_service_principal_token()
'''
Example config file:
[environment]
data_partition_id=opendes
storage_url=https://[STORAGE_ENDPOINT]/api/storage/v2
search_url=https://[SEARCH_ENDPOINT]/api/search/v2
data_workflow_url=https://[WORKFLOW_ENDPOINT]/api/data-workflow/v1
file_dms_url=https://[FILE_DMS_ENDPOINT]/api/filedms/v2
dataset_registry_url=https://[DATASET_REGISTRY_URL]/api/dataset-registry/v1
def get_config_value(self, value: str) -> str:
return self._config_manager.get_config_value(value)
[provider]
name=aws
entitlements_module_name=entitlements_client
'''
def _parse_config(self):
config_parser = SafeConfigParser(os.environ)
config_file_name = 'osdu_api.ini'
found_names = config_parser.read(config_file_name)
if config_file_name not in found_names:
raise Exception('Could not find osdu_api.ini config file')
@authorize()
def _send_request(self, headers: dict, url: str, params: dict, data: str, method: HttpMethod) -> requests.Response:
if method is HttpMethod.GET:
response = requests.get(url=url, params=params, headers=headers)
elif method is HttpMethod.POST:
response = requests.post(url=url, params=params, data=data, headers=headers)
elif method is HttpMethod.PUT:
response = requests.put(url=url, params=params, data=data, headers=headers)
else:
raise UnknownRequestMethodError
return response
self.data_partition_id = config_parser.get('environment', 'data_partition_id')
self.storage_url = config_parser.get('environment', 'storage_url')
self.search_url = config_parser.get('environment', 'search_url')
self.data_workflow_url = config_parser.get('environment', 'data_workflow_url')
self.file_dms_url = config_parser.get('environment', 'file_dms_url')
self.legal_url = config_parser.get('environment', 'legal_url')
self.entitlements_url = config_parser.get('environment', 'entitlements_url')
self.dataset_url = config_parser.get('environment', 'dataset_url')
self.use_service_principal = config_parser.get('environment', 'use_service_principal')
self.provider = config_parser.get('provider', 'name')
self.service_principal_module_name = config_parser.get('provider', 'service_principal_module_name')
def make_request(
self,
method: HttpMethod,
url: str,
data: str = '',
add_headers: dict = None,
params: dict = None
) -> requests.Response:
"""
Makes a request using python's built in requests library. Takes additional headers if
necessary
"""
params = params or {}
add_headers = add_headers or {}
'''
The path to the logic to get a valid bearer token is dynamically injected based on
what provider and entitlements module name is provided in the configuration yaml
'''
def _refresh_service_principal_token(self):
entitlements_client = importlib.import_module('osdu_api.providers.%s.%s' % (self.provider, self.service_principal_module_name))
self.service_principal_token = entitlements_client.get_service_principal_token()
'''
Makes a request using python's built in requests library. Takes additional headers if
necessary
'''
def make_request(self, method: HttpMethod, url: str, data = '', add_headers = {}, params = {}, bearer_token = None):
if bearer_token is not None and 'Bearer ' not in bearer_token:
bearer_token = 'Bearer ' + bearer_token
headers = {
'content-type': 'application/json',
'data-partition-id': self.data_partition_id,
'Authorization': bearer_token if bearer_token is not None else self.service_principal_token
}
for key, value in add_headers.items():
headers[key] = value
response = self._send_request(headers, url, params, data, method)
if (len(add_headers) > 0):
for key, value in add_headers:
headers[key] = value
response = object
if (method == HttpMethod.GET):
response = requests.get(url=url, params=params, headers=headers, verify=False)
elif (method == HttpMethod.DELETE):
response = requests.delete(url=url, params=params, headers=headers, verify=False)
elif (method == HttpMethod.POST):
response = requests.post(url=url, params=params, data=data, headers=headers, verify=False)
elif (method == HttpMethod.PUT):
response = requests.put(url=url, params=params, data=data, headers=headers, verify=False)
if (response.status_code == 401 or response.status_code == 403) and self.unauth_retries < 1:
if self.use_service_principal == 'True' or self.use_service_principal == 'true':
self.unauth_retries += 1
self._refresh_service_principal_token()
self.make_request(method, url, data, add_headers, params, None)
self.unauth_retries = 0
return response
......@@ -63,7 +63,7 @@ class BaseClient:
self.entitlements_url = config_parser.get('environment', 'entitlements_url')
self.dataset_url = config_parser.get('environment', 'dataset_url')
self.use_service_principal = config_parser.get('environment', 'use_service_principal')
self.schema_url = config_parser.get('environment', 'schema_url')
self.provider = config_parser.get('provider', 'name')
self.service_principal_module_name = config_parser.get('provider', 'service_principal_module_name')
......@@ -90,7 +90,7 @@ class BaseClient:
}
if (len(add_headers) > 0):
for key, value in add_headers:
for key, value in add_headers.items():
headers[key] = value
response = object
......@@ -112,4 +112,4 @@ class BaseClient:
self.unauth_retries = 0
return response
\ No newline at end of file
return response
......@@ -23,8 +23,8 @@ class DatasetDmsClient(BaseClient):
Holds the logic for interfacing with Data Registry Service's DMS api
"""
def get_storage_instructions(self, resource_type_id: str, bearer_token=None):
params = {'resourceType': resource_type_id}
def get_storage_instructions(self, kind_sub_type: str, bearer_token=None):
params = {'kindSubType': kind_sub_type}
return self.make_request(method=HttpMethod.GET, url='{}{}'.format(self.dataset_url, '/getStorageInstructions'),
params=params, bearer_token=bearer_token)
......
# Copyright © Microsoft Corporation
# Copyright © 2020 Amazon Web Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from typing import List
from osdu_api.clients.base_client import BaseClient
from osdu_api.model.http_method import HttpMethod
#from osdu_api.model.storage.schema.schema import Schema
#from osdu_api.model.storage.schema.schema_attribute import SchemaAttribute
import os
import importlib
import importlib.util
from urllib.request import Request, urlopen
from urllib.parse import urlencode
from urllib.error import HTTPError
from json import loads
def get_bearer_token():
client_id = os.environ['client_id']
client_secret = os.environ['client_secret']
tenant_id = os.environ['tenant_id']
token_endpoint = os.environ['token_endpoint']
resource = os.environ['resource']
class SchemaClient(BaseClient):
"""
Holds the logic for interfacing with Schema API
"""
def get_schema_by_id(self, schema_id: str, bearer_token=None):
return self.make_request(method=HttpMethod.GET, url = '{}/{}/{}'.format(self.schema_url, 'schema', schema_id),
data={}, bearer_token=bearer_token)
body = {
"grant_type": "client_credentials",
"client_id": client_id,
"client_secret": client_secret,
"resource":resource
}
headers = {
"Content-Type": "application/x-www-form-urlencoded"
}
data = urlencode(body).encode("utf8")
request = Request(url=token_endpoint, data=data, headers=headers)
try:
response = urlopen(request)
response_body = response.read()
resp = loads(response_body)
token = resp["access_token"]
except HTTPError:
raise
return f'Bearer {token}'
# Copyright © 2020 Amazon Web Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[environment]
data_partition_id=opendes
storage_url=%(BASE_URL)s/api/storage/v2
search_url=%(BASE_URL)s/api/search/v2
legal_url=%(BASE_URL)s/api/legal/v1
data_workflow_url=%(BASE_URL)s/api/data-workflow/v1
file_dms_url=%(BASE_URL)s/api/filedms/v2
dataset_url=%(BASE_URL)s/api/dataset-registry/v1
entitlements_url=%(BASE_URL)s/api/entitlements/v1
use_service_principal=False
[provider]
name=aws
service_principal_module_name=service_principal_util
token_url_ssm_path=/osdu/%(ENVIRONMENT)s/oauth-token-uri
aws_oauth_custom_scope_ssm_path=/osdu/%(ENVIRONMENT)s/oauth-custom-scope
client_id_ssm_path=/osdu/%(ENVIRONMENT)s/client-credentials-client-id
client_secret_name=/osdu/%(ENVIRONMENT)s/client_credentials_secret
client_secret_dict_key=client_credentials_client_secret
region_name=%(AWS_REGION)s
# Copyright © 2020 Amazon Web Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[environment]
data_partition_id=opendes
storage_url=%(BASE_URL)s/api/storage/v2
search_url=%(BASE_URL)s/api/search/v2
legal_url=%(BASE_URL)s/api/legal/v1
data_workflow_url=%(BASE_URL)s/api/data-workflow/v1
file_dms_url=%(BASE_URL)s/api/filedms/v2
dataset_url=%(BASE_URL)s/api/dataset-registry/v1
entitlements_url=%(BASE_URL)s/api/entitlements/v1
schema_url=%(BASE_URL)s/api/schema-service/v1
use_service_principal=True
[provider]
name=aws
service_principal_module_name=service_principal_util
token_url_ssm_path=/osdu/%(ENVIRONMENT)s/oauth-token-uri
aws_oauth_custom_scope_ssm_path=/osdu/%(ENVIRONMENT)s/oauth-custom-scope
client_id_ssm_path=/osdu/%(ENVIRONMENT)s/client-credentials-client-id
client_secret_name=/osdu/%(ENVIRONMENT)s/client_credentials_secret
client_secret_dict_key=client_credentials_client_secret
region_name=%(AWS_REGION)s
......@@ -20,7 +20,7 @@ from functools import partial
from http import HTTPStatus
import requests
from osdu_api.libs.exceptions.exceptions import TokenRefresherNotPresentError
from osdu_api.libs.exceptions import TokenRefresherNotPresentError
logger = logging.getLogger()
......
......@@ -17,7 +17,7 @@ import enum
import os
import yaml
from osdu_api.libs.exceptions.exceptions import ConfigurationError
from osdu_api.libs.exceptions import ConfigurationError
"""
This module is used for initializing configurations, such as OSDU API endpoints, vendor info etc.
......
# Copyright 2020 Google LLC
# Copyright © 2020 Amazon Web Services
# Copyright 2020 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,26 +13,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from typing import List
from osdu_api.base_client import BaseClient
from osdu_api.model.http_method import HttpMethod
from osdu_api.model.search.query_response import QueryResponse
"""Constants module."""
'''
Holds the logic for interfacing with Search's query api
'''
class SearchClient(BaseClient):
RETRIES = 3
TIMEOUT = 1
WAIT = 10
'''
Used to hit search's api endpoint "queryRecords"
'''
def query_records_from_dict(self, query_request: dict):
query_request_data = json.dumps(query_request)
FIRST_STORED_RECORD_INDEX = 0
response = self.make_request(method=HttpMethod.POST, url=self.search_url, data=query_request_data)
response_content = json.loads(response.content)
query_response = QueryResponse(response_content['results'], response_content['aggregations'])
return query_response
# Paths to extend schema fields with surrogate keys
DATA_TYPES_WITH_SURROGATE_KEYS = ("dataset", "work-product", "work-product-component")
SURROGATE_KEYS_PATHS = [
("definitions", "{{data-partition-id}}:wks:AbstractWPCGroupType:1.0.0", "properties", "Datasets",
"items"),
("definitions", "osdu:wks:AbstractWPCGroupType:1.0.0", "properties", "Artefacts",
"items", "properties", "ResourceID"),
("properties", "data", "allOf", 1, "properties", "Components", "items"),
]
DATA_SECTION = "Data"
DATASETS_SECTION = "Datasets"
MASTER_DATA_SECTION ="MasterData"
REFERENCE_DATA_SECTION ="ReferenceData"
WORK_PRODUCT_SECTION = "WorkProduct"
WORK_PRODUCT_COMPONENTS_SECTION = "WorkProductComponents"
......@@ -13,23 +13,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Context module."""
import dataclasses
@dataclasses.dataclass
class Context(object):
"""
Store data-partition-id and AppKey passed via Payload field of dagrun.conf.
"""
class Context:
"""Class to store data-partition-id and AppKey."""
data_partition_id: str
app_key: str
@classmethod
def populate(cls, ctx: dict) -> 'Context':
"""
Populates Context dataclass from dagrun.conf dict.
:return: populated Context
:rtype: Context
"""
ctx_payload = ctx.pop('Payload')
ctx_obj = cls(
app_key=ctx_payload['AppKey'],
data_partition_id=ctx_payload['data-partition-id']
)
ctx_obj = cls(app_key=ctx_payload['AppKey'],
data_partition_id=ctx_payload['data-partition-id'])
return ctx_obj
# Copyright 2020 Google LLC
# Copyright 2020 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Exceptions module."""
class RecordsNotSearchableError(Exception):
"""Raise when expected totalCount of records differs from actual one."""
pass
class PipelineFailedError(Exception):
"""Raise when pipeline failed."""
pass
class EmptyManifestError(Exception):
"""Raise when manifest field is empty."""
pass
class GetSchemaError(Exception):
"""Raise when can't find schema."""
pass
class SRNNotFound(Exception):
"""Raise when can't find SRN."""
pass
class NotOSDUSchemaFormatError(Exception):
"""Raise when schema doesn't correspond OSDU format."""
pass
class FileSourceError(Exception):
"""Raise when file doesn't exist under given URI path."""
pass
class UploadFileError(Exception):
"""Raise when there is an error while uploading a file into OSDU."""
class TokenRefresherNotPresentError(Exception):
"""Raise when token refresher is not present in "refresh_token' decorator."""
pass
class NoParentEntitySystemSRNError(Exception):
"""Raise when parent entity doesn't have system-generated SRN."""
pass
class NoParentEntitySystemSRNError(Exception):
"""
Raise when parent entity doesn't have system-generated SRN.
"""
pass
class InvalidFileRecordData(Exception):
"""Raise when file data does not contain mandatory fields."""
class GenericManifestSchemaError(Exception):
"""Raise when a generic manifest schema is invalid."""
# Copyright 2020 Google LLC
# Copyright 2020 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This module provides cloud specific File Handler implementations."""
import dataclasses
import io
import json
import logging
import uuid
from typing import List, Tuple, TypeVar
import requests
import tenacity
from osdu_api.libs.constants import RETRIES, WAIT
from osdu_api.libs.context import Context
from osdu_api.libs.exceptions import InvalidFileRecordData
from osdu_api.libs.mixins import HeadersMixin
from osdu_api.libs.auth.authorization import TokenRefresher, authorize
from osdu_api.providers import blob_storage
from osdu_api.providers.types import BlobStorageClient, FileLikeObject
logger = logging.getLogger()
RETRY_SETTINGS = {
"stop": tenacity.stop_after_attempt(RETRIES),
"wait": tenacity.wait_fixed(WAIT),
}
@dataclasses.dataclass
class FileUploadUrlResponse:
"""Simple class to store File service uploadURL response values."""
file_id: str
signed_url: str
file_source: str