From f3b279fb2c47221a58a0e5ebb48724425d1df80f Mon Sep 17 00:00:00 2001 From: Luc Yriarte Date: Fri, 28 Aug 2020 11:27:08 +0200 Subject: [PATCH 1/5] os-core-lib-python-gcp snapshot @331210f --- .bumpversion.cfg | 6 + .gitignore | 110 +++++++++++ LICENSE.TXT | 177 +++++++++++++++++ README.md | 13 ++ osdu_gcp/__init__.py | 9 + osdu_gcp/storage/__init__.py | 0 osdu_gcp/storage/auth_gcp_sa.py | 101 ++++++++++ osdu_gcp/storage/blob_storage_gcp.py | 262 +++++++++++++++++++++++++ osdu_gcp/storage/http_client_gcp.py | 102 ++++++++++ requirements.txt | 7 + requirements_dev.txt | 3 + setup.py | 57 ++++++ tests/__init__.py | 0 tests/conftest.py | 6 + tests/storage/__init__.py | 0 tests/storage/test_blob_storage_gcp.py | 116 +++++++++++ tests/test_utils.py | 5 + 17 files changed, 974 insertions(+) create mode 100644 .bumpversion.cfg create mode 100644 .gitignore create mode 100644 LICENSE.TXT create mode 100644 README.md create mode 100644 osdu_gcp/__init__.py create mode 100644 osdu_gcp/storage/__init__.py create mode 100644 osdu_gcp/storage/auth_gcp_sa.py create mode 100644 osdu_gcp/storage/blob_storage_gcp.py create mode 100644 osdu_gcp/storage/http_client_gcp.py create mode 100644 requirements.txt create mode 100644 requirements_dev.txt create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/storage/__init__.py create mode 100644 tests/storage/test_blob_storage_gcp.py create mode 100644 tests/test_utils.py diff --git a/.bumpversion.cfg b/.bumpversion.cfg new file mode 100644 index 0000000..d867e66 --- /dev/null +++ b/.bumpversion.cfg @@ -0,0 +1,6 @@ +[bumpversion] +current_version = 0.2.3 +commit = True +tag = False + +[bumpversion:file:osdu_gcp/__init__.py] diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6690683 --- /dev/null +++ b/.gitignore @@ -0,0 +1,110 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# ide +.idea/ + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +.venv* +env/ +venv/ +venv*/ +ENV/ +env.bak/ +venv.bak/ +.virtualenv/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/LICENSE.TXT b/LICENSE.TXT new file mode 100644 index 0000000..4947287 --- /dev/null +++ b/LICENSE.TXT @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a1fac72 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# osdu-core-lib-python-gcp + +This is the python package for osdu google cloud implementations. + +## testing + +You need valid gcp credentials and a project ID to run the unit tests. + +```bash +export TESTING_GCP_DATA_PROJECT_CREDENTIALS='path_to_jwt' +export TESTING_GCP_DATA_PROJECT_ID='xxxx' +pytest +``` diff --git a/osdu_gcp/__init__.py b/osdu_gcp/__init__.py new file mode 100644 index 0000000..f111b7c --- /dev/null +++ b/osdu_gcp/__init__.py @@ -0,0 +1,9 @@ +__version__ = '0.2.3' + +# MUSE NOT BE CHANGED MANUALLY +# The patch number (the last one) should only be updated by the build/deploy script, if changed manually +# To increase the minor or major version: +# $> pip install -U bumpversion +# $> bumpversion major | minor +# then $> git push --tag +# may fail on windows try without commit: bumpversion part --no-commit diff --git a/osdu_gcp/storage/__init__.py b/osdu_gcp/storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/osdu_gcp/storage/auth_gcp_sa.py b/osdu_gcp/storage/auth_gcp_sa.py new file mode 100644 index 0000000..4345e4a --- /dev/null +++ b/osdu_gcp/storage/auth_gcp_sa.py @@ -0,0 +1,101 @@ +import aiohttp +import json +from typing import List, Optional +import datetime +import time +import jwt +from urllib.parse import quote_plus +from urllib.parse import urlencode +from osdu.core.auth import AuthBase + + +class GoogleAccountAuth(AuthBase): + _scheme: str = 'Bearer' + """ + Based on gcloud aio project https://github.com/talkiq/gcloud-aio/blob/master/auth/gcloud/aio/auth/token.py + """ + def __init__(self, + service_file: str, + session: aiohttp.ClientSession, + scopes: List[str] = None): + """ + :param service_file: service account file + :param session: aio http session + :param scopes: scopes + """ + super().__init__() + with open(service_file, 'r') as f: + self.service_data = json.load(f) + + self.session = session + self.scopes = ' '.join(scopes or []) + self.access_token: Optional[str] = None + self.access_token_duration = 0 + self.access_token_acquired_at = datetime.datetime(1970, 1, 1) + + @property + def scheme(self) -> Optional[str]: + """ Return scheme """ + return self._scheme + + async def header_value(self) -> str: + token = await self._get_token() + return f'{self._scheme} {token}' + + async def token(self) -> str: + return await self._get_token() + + async def __call__(self): + """ return tuple scheme, credentials/token """ + token = await self._get_token() + return self._scheme, token + + @property + def email(self) -> Optional[str]: + """ Return email used in token, if available """ + return self.service_data['client_email'] + + async def _refresh_token_for_service_account(self): + token_uri = self.service_data.get('token_uri', 'https://oauth2.googleapis.com/token') + + now = int(time.time()) + assertion_payload = { + 'aud': token_uri, + 'exp': now + 3600, + 'iat': now, + 'iss': self.service_data['client_email'], + 'scope': self.scopes, + } + + # N.B. algorithm='RS256' requires an extra 240MB in dependencies... + assertion = jwt.encode(assertion_payload, + self.service_data['private_key'], + algorithm='RS256') + payload = urlencode({ + 'assertion': assertion, + 'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer', + }, quote_via=quote_plus) + + refresh_headers = {'Content-Type': 'application/x-www-form-urlencoded'} + + async with self.session.post(token_uri, data=payload, headers=refresh_headers) as resp: + if resp.status != 200: + raise Exception() #TODO + content = await resp.json() + + # print('from token ------------') + # print(content) + + self.access_token = str(content['access_token']) + self.access_token_duration = int(content['expires_in']) + self.access_token_acquired_at = datetime.datetime.utcnow() + + async def _get_token(self) -> str: + if self.access_token: + now = datetime.datetime.utcnow() + delta = (now - self.access_token_acquired_at).total_seconds() + if delta < 3000: + return self.access_token + + await self._refresh_token_for_service_account() + return self.access_token diff --git a/osdu_gcp/storage/blob_storage_gcp.py b/osdu_gcp/storage/blob_storage_gcp.py new file mode 100644 index 0000000..a82afc6 --- /dev/null +++ b/osdu_gcp/storage/blob_storage_gcp.py @@ -0,0 +1,262 @@ +# Based on https://github.com/talkiq/gcloud-aio/blob/master/storage/gcloud/aio/storage/storage.py + +import enum +import io +import mimetypes +import os +from typing import Any, Optional, Tuple, List +from urllib.parse import quote +from asyncio import sleep +from .http_client_gcp import * +from aiohttp import ClientResponseError +from osdu.core.api.storage.blob_storage_base import BlobStorageBase +from osdu.core.api.storage.blob import Blob +from .auth_gcp_sa import GoogleAccountAuth + + +try: + import ujson as json +except ImportError: + import json # type: ignore + +API_ROOT = 'https://www.googleapis.com/storage/v1/b' +API_ROOT_UPLOAD = 'https://www.googleapis.com/upload/storage/v1/b' +MAX_CONTENT_LENGTH_SIMPLE_UPLOAD = 5 * 1024 * 1024 # 5 MB +# log = logging.getLogger(__name__) + + +class UploadType(enum.Enum): + SIMPLE = 1 + RESUMABLE = 2 + + +class GCloudAioStorage(BlobStorageBase): + _scopes = ['https://www.googleapis.com/auth/devstorage.read_write'] + _access_token_dict = {} + + def __init__(self, session=None, service_account_file: Optional[str] = None): + self._client = GCloudAioHttpClient(scopes=self._scopes, session=session) + self._session = session + self._service_account_file = service_account_file + + async def _get_access_token(self, + project: str, bucket: str, + forwarded_auth: Optional = None) -> str: + if forwarded_auth is not None: + return await forwarded_auth.token() + + assert self._service_account_file, 'No credentials provided' + token_cache = self._access_token_dict + cache_key = f'{project}_{bucket}' + tenant_access_token = token_cache.get(cache_key, None) + if tenant_access_token is None: + tenant_access_token = GoogleAccountAuth(service_file=self._service_account_file, + session=self._session, + scopes=self._scopes) + token_cache[cache_key] = tenant_access_token + + return await tenant_access_token.token() + + async def delete(self, project: str, bucket: str, object_name: str, + *, auth: Optional = None, timeout: int = 10, params: dict = None, **kwargs): + # https://cloud.google.com/storage/docs/json_api/#encoding + encoded_object_name = quote(object_name, safe='') + url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}' + token = await self._get_access_token(project, bucket, auth) + await self._client.delete(url, + params=params or {}, + timeout=timeout, + auth_token=token) + + async def download(self, project: str, bucket: str, object_name: str, + *, auth: Optional = None, timeout: int = 10, **kwargs) -> bytes: + return await self._download(project, bucket, object_name, auth=auth, timeout=timeout, params={'alt': 'media'}) + + async def download_metadata(self, project: str, bucket: str, object_name: str, + *, auth: Optional = None, timeout: int = 10, ** kwargs) -> Blob: + data = await self._download(project, bucket, object_name, auth=auth, timeout=timeout) + metadata: dict = json.loads(data.decode()) + return Blob(identifier=metadata.get('id', None), + bucket=metadata.get('bucket', bucket), + name=metadata.get('name', object_name), + metadata=metadata, + acl=metadata.get('acl', None), + content_type=metadata.get('contentType', None), + time_created=metadata.get('timeCreated', None), + time_updated=metadata.get('updated', None), + size=metadata.get('size', -1) + ) + + async def list_objects(self, project: str, bucket: str, *, + auth: Optional = None, prefix: str = '', page_token: Optional[str] = None, + max_result: Optional[int] = None, timeout: int = 10, **kwargs) -> List[str]: + url = f'{API_ROOT}/{bucket}/o' + params = {'prefix': prefix} + if page_token is not None: + params['pageToken'] = page_token + if max_result is not None: + params['maxResults'] = max_result + data, _ = await self._client.get(url, response_type=ResponseType.JSON, params=params or {}, timeout=timeout, + auth_token=await self._get_access_token(project, bucket, auth)) + + return [x['name'] for x in data.get('items', list())] + + @classmethod + def build_URI(cls, bucket: str, object_name: str) -> str: + return f'gs://{bucket}/{object_name}' + + # TODO: if `metadata` is set, use multipart upload: + # https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload + async def upload(self, project: str, bucket: str, object_name: str, file_data: Any, *, + auth: Optional = None, content_type: str = None, metadata: dict = None, + timeout: int = 30, **kwargs) -> dict: + url = f'{API_ROOT_UPLOAD}/{bucket}/o' + + stream = self._preprocess_data(file_data) + content_length = self._get_stream_len(stream) + + # mime detection method same as in aiohttp 3.4.4 + content_type = content_type or mimetypes.guess_type(object_name)[0] + + parameters = {} + headers = {} + headers.update({ + 'Content-Length': str(content_length), + 'Content-Type': content_type or '', + }) + + force_resumable_upload: bool = None + upload_type = self._decide_upload_type(force_resumable_upload, content_length) + # log.debug('using %r gcloud storage upload method', upload_type) + if upload_type == UploadType.SIMPLE: + # if metadata: + # log.warning('metadata will be ignored for upload_type=Simple') + return await self._upload_simple(project, bucket, url, object_name, stream, parameters, headers, + auth=auth, timeout=timeout) + + if upload_type == UploadType.RESUMABLE: + return await self._upload_resumable(project, bucket, url, object_name, stream, parameters, headers, + auth=auth, metadata=metadata, timeout=timeout) + + raise TypeError(f'upload type {upload_type} not supported') + + @staticmethod + def _get_stream_len(stream: io.IOBase) -> int: + current = stream.tell() + try: + return stream.seek(0, os.SEEK_END) + finally: + stream.seek(current) + + @staticmethod + def _preprocess_data(data: Any) -> io.IOBase: + if data is None: + return io.StringIO('') + + if isinstance(data, bytes): + return io.BytesIO(data) + if isinstance(data, str): + return io.StringIO(data) + if isinstance(data, io.IOBase): + return data + + raise TypeError(f'unsupported upload type: "{type(data)}"') + + @staticmethod + def _decide_upload_type(force_resumable_upload: Optional[bool], + content_length: int) -> UploadType: + # force resumable + if force_resumable_upload is True: + return UploadType.RESUMABLE + + # force simple + if force_resumable_upload is False: + return UploadType.SIMPLE + + # decide based on Content-Length + if content_length > MAX_CONTENT_LENGTH_SIMPLE_UPLOAD: + return UploadType.RESUMABLE + + return UploadType.SIMPLE + + @staticmethod + def _split_content_type(content_type: str) -> Tuple[str, str]: + content_type_and_encoding_split = content_type.split(';') + content_type = content_type_and_encoding_split[0].lower().strip() + + encoding = None + if len(content_type_and_encoding_split) > 1: + encoding_str = content_type_and_encoding_split[1].lower().strip() + encoding = encoding_str.split('=')[-1] + + return content_type, encoding + + async def _download(self, project: str, bucket: str, object_name: str, * + , auth: Optional = None, params: dict = None, timeout: int = 10) -> bytes: + # https://cloud.google.com/storage/docs/json_api/#encoding + encoded_object_name = quote(object_name, safe='') + url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}' + token = await self._get_access_token(project, bucket, auth) + data, _ = await self._client.get(url, response_type=ResponseType.BYTES, + params=params or {}, timeout=timeout, auth_token=token) + # N.B. the GCS API sometimes returns 'application/octet-stream' when a + # string was uploaded. To avoid potential weirdness, always return a + # bytes object. + return data + + async def _upload_simple(self, project: str, bucket: str, url: str, object_name: str, + stream: io.IOBase, params: dict, headers: dict, + *, auth: Optional = None, timeout: int = 30) -> dict: + # https://cloud.google.com/storage/docs/json_api/v1/how-tos/simple-upload + params['name'] = object_name + params['uploadType'] = 'media' + + headers.update({'Accept': 'application/json'}) + token = await self._get_access_token(project, bucket, auth) + data, _ = await self._client.post(url, data=stream, response_type=ResponseType.JSON, headers=headers, + params=params, timeout=timeout, auth_token=token) + return data + + async def _upload_resumable(self, project: str, bucket: str, url: str, object_name: str, + stream: io.IOBase, params: dict, + headers: dict, *, metadata: dict = None, + auth: Optional = None, timeout: int = 30) -> dict: + # https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload + session_uri = await self._initiate_upload(project, bucket, url, object_name, params, headers, + auth=auth, metadata=metadata) + data: dict = await self._do_upload(project, bucket, session_uri, stream, + auth=auth, headers=headers, timeout=timeout) + return data + + async def _initiate_upload(self, project: str, bucket: str, url: str, object_name: str, params: dict, + headers: dict, *, auth: Optional = None, metadata: dict = None) -> str: + params['uploadType'] = 'resumable' + + metadict = (metadata or {}).copy() + metadict.update({'name': object_name}) + metadata = json.dumps(metadict) + + post_headers = headers.copy() + post_headers.update({ + 'X-Upload-Content-Type': headers['Content-Type'], + 'X-Upload-Content-Length': headers['Content-Length'] + }) + token = await self._get_access_token(project, bucket, auth) + _, response_headers = await self._client.post(url, response_type=ResponseType.NONE, headers=post_headers, + params=params, data=metadata, timeout=10, + auth_token=token) + session_uri: str = response_headers['Location'] + return session_uri + + async def _do_upload(self, project: str, bucket: str, session_uri: str, stream: io.IOBase, + headers: dict, *, auth: Optional = None, retries: int = 5, **kwargs) -> dict: + + for tries in range(retries): + try: + token = await self._get_access_token(project, bucket, auth) + data, _ = await self._client.put(session_uri, response_type=ResponseType.JSON, headers=headers, + data=stream, auth_token=token, **kwargs) + return data + except ClientResponseError: + headers.update({'Content-Range': '*/*'}) + await sleep(2. ** tries) diff --git a/osdu_gcp/storage/http_client_gcp.py b/osdu_gcp/storage/http_client_gcp.py new file mode 100644 index 0000000..087d591 --- /dev/null +++ b/osdu_gcp/storage/http_client_gcp.py @@ -0,0 +1,102 @@ +from typing import Optional, List, Any, Union, Tuple +from multidict import CIMultiDictProxy +import enum + +try: + import ujson as json +except ImportError: + import json # type: ignore + +__all__ = ['ResponseType', 'GCloudAioHttpClient'] + + +class ResponseType(enum.Enum): + NONE = 1 + JSON = 2 + TEXT = 3 + BYTES = 4 + + +class GCloudAioHttpClient: + + def __init__(self, scopes: List[str], session): + self._scopes: List[str] = scopes + self._session = session + + def get_session(self): + return self._session + + async def _http_call(self, session_method, url: str, *, + response_type: ResponseType = ResponseType.JSON, + auth_token=None, + data: Any = None, + headers: Optional[dict] = None, + **kwargs: Any) -> Tuple[Union[dict, bytes, str, None], 'CIMultiDictProxy[str]']: + + headers = headers or {} + + if auth_token is not None: + headers['Authorization'] = f'Bearer {auth_token}' + + if data is None: + headers['Content-Length'] = '0' + elif type(data) is dict: + data = json.dumps(data).encode('utf-8') + headers['Content-Length']: str(len(data)) + headers['Content-Type'] = 'application/json; charset=UTF-8' + elif type(data) is str: + data = data.encode('utf-8') + headers['Content-Length']: str(len(data)) + headers['Content-Type'] = 'text/html; charset=UTF-8' + else: + assert 'Content-Type' in headers + + async with session_method(url, data=data, headers=headers, **kwargs) as resp: + resp.raise_for_status() + if response_type == ResponseType.JSON: + return await resp.json(), resp.headers + if response_type == ResponseType.TEXT: + return await resp.text(), resp.headers + if response_type == ResponseType.BYTES: + return await resp.read(), resp.headers + return None, resp.headers + + async def get(self, url: str, *, + response_type: ResponseType = ResponseType.JSON, + headers: Optional[dict] = None, + auth_token=None, + **kwargs: Any) -> Tuple[Union[dict, bytes, str, None], 'CIMultiDictProxy[str]']: + session = self.get_session() + return await self._http_call(session.get, url, response_type=response_type, data=None, headers=headers, + auth_token=auth_token, **kwargs) + + async def post(self, url: str, *, + response_type: ResponseType = ResponseType.JSON, + data: Any = None, + headers: Optional[dict] = None, + auth_token=None, + **kwargs: Any) -> Tuple[Union[dict, bytes, str, None], 'CIMultiDictProxy[str]']: + session = self.get_session() + return await self._http_call(session.post, url, response_type=response_type, data=data, headers=headers, + auth_token=auth_token, **kwargs) + + async def put(self, url: str, *, + response_type: ResponseType = ResponseType.JSON, + data: Any = None, + headers: Optional[dict] = None, + auth_token=None, + **kwargs: Any) -> Tuple[Union[dict, bytes, str, None], 'CIMultiDictProxy[str]']: + session = self.get_session() + return await self._http_call(session.put, url, response_type=response_type, data=data, headers=headers, + auth_token=auth_token, **kwargs) + + async def delete(self, url: str, headers: Optional[dict] = None, auth_token=None, **kwargs: Any) -> str: + headers = headers or {} + if auth_token is not None: + headers['Authorization'] = f'Bearer {auth_token}' + + session = self.get_session() + + async with session.delete(url, headers=headers, **kwargs) as resp: + resp.raise_for_status() + return await resp.text() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7c27627 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +# osdu core lib main python +osdu-core-lib-python>=0.3.0, <0.4 + +# for google provider +aiohttp +cryptography +pyjwt diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..60f6a19 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,3 @@ +pytest>=3 +pytest-asyncio +pytest-cov diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e8d3317 --- /dev/null +++ b/setup.py @@ -0,0 +1,57 @@ +import os +import re +from setuptools import setup, find_packages + +requirements_file = os.path.abspath( + os.path.join(os.path.dirname(__file__), 'requirements.txt') +) + +test_requirements_file = os.path.abspath( + os.path.join(os.path.dirname(__file__), 'requirements_dev.txt') +) + + +# version is unified in osdu_gcp/__init__.py +def read_version(): + regexp = re.compile(r"^__version__\W*=\W*'([\d.abrc]+)'") + init_py = os.path.join(os.path.dirname(__file__), 'osdu_gcp', '__init__.py') + with open(init_py) as f: + for line in f: + match = regexp.match(line) + if match is not None: + return match.group(1) + raise RuntimeError('Cannot find version in {}'.format(init_py)) + + +with open(requirements_file) as f: + requirements = f.read().splitlines() + +with open(test_requirements_file) as f: + test_requirements = f.read().splitlines() + +setup( + name='osdu-core-lib-python-gcp', + + version=read_version(), + + packages=find_packages(), + + author="OSDU Wellbore data domain services team", + + author_email="WellboreDDMS@slb.com", + + description="OSDU python for wellbore DMS - google cloud implementations", + + long_description=open('README.md').read(), + + include_package_data=True, + + install_requires=requirements, + + tests_require=test_requirements, + + license="Apache License, Version 2.0", + + # We need py 3.7 for some of the tools used here + python_requires='>=3.7', +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..ba2677b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,6 @@ +from .test_utils import from_env + + +# cloud provider dependent configurations +TESTING_GCP_DATA_PROJECT_CREDENTIALS = from_env('TESTING_GCP_DATA_PROJECT_CREDENTIALS') +TESTING_GCP_DATA_PROJECT_ID = from_env('TESTING_GCP_DATA_PROJECT_ID') diff --git a/tests/storage/__init__.py b/tests/storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/storage/test_blob_storage_gcp.py b/tests/storage/test_blob_storage_gcp.py new file mode 100644 index 0000000..f2c0ffe --- /dev/null +++ b/tests/storage/test_blob_storage_gcp.py @@ -0,0 +1,116 @@ +from tests.conftest import * +import tempfile +import shutil +from osdu_gcp.storage.blob_storage_gcp import GCloudAioStorage +import pytest +import aiohttp +import uuid + + +class _TESTING_CFG: + project_id = TESTING_GCP_DATA_PROJECT_ID + credentials = TESTING_GCP_DATA_PROJECT_CREDENTIALS + bucket_name = 'testing-osdu-core' + + +TEST_DATA = { + 'initial_files': [ + # object_name , content + ('test_file_1.txt', 'content of test file 1'), + ('test_file_2.txt', 'content of test file 2') + ] +} + + +# test local and gcp storage +@pytest.fixture(params=['GCloudAioStorage']) +async def storage_client(request): + client_name = request.param + if client_name == 'GCloudAioStorage': + session = aiohttp.ClientSession() + yield GCloudAioStorage(session=session, service_account_file=_TESTING_CFG.credentials) + await session.close() + + +@pytest.fixture +async def temp_directory() -> str: + tmpdir = tempfile.mkdtemp() + yield tmpdir + + # teardown - recursively delete the tmp directory + shutil.rmtree(tmpdir, ignore_errors=True) + +@pytest.mark.asyncio +async def test_list_objects(storage_client): + result = await storage_client.list_objects(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name) + for file_name, _ in TEST_DATA['initial_files']: + assert file_name in result + + +@pytest.mark.asyncio +async def test_download(storage_client): + name, expected_content = TEST_DATA['initial_files'][0] + data = await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name) + result = data.decode('utf-8') + assert result == expected_content + + +@pytest.mark.asyncio +async def test_upload_check_delete(storage_client): + name = str(uuid.uuid4()) + content = str(uuid.uuid4()) + + # upload + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name, content, content_type='text/plain') + + # check single object with this name + result = await storage_client.list_objects(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, prefix=name) + assert result == [name] + + # check its content + data = await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name) + assert data.decode('utf-8') == content + + # delete it + await storage_client.delete(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name) + + # check nothing remains + result = await storage_client.list_objects(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, prefix=name) + assert len(result) == 0 + + +@pytest.mark.asyncio +async def test_upload_from_various_type(storage_client, temp_directory): + file_c = temp_directory + '\\testing.file' + content_bin = b'expected content 123456789' + content_str = content_bin.decode('utf-8') + with open(file_c, 'w') as f: + f.write('expected content 123456789') + + with open(file_c, 'rb') as file_bin_input: + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'file_bin_input', file_bin_input) + assert await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'file_bin_input') == content_bin + + with open(file_c, 'r') as file_txt_input: + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'file_txt_input', file_txt_input) + assert await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'file_txt_input') == content_bin + + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'str_input', content_str) + assert await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'str_input') == content_bin + + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'bin_input', content_bin) + assert await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'bin_input') == content_bin + + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'empty_input', None) + actual_data = await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'empty_input') + assert len(actual_data) == 0 + + with pytest.raises(TypeError): + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'int_input', 123456) + + + + + + + diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..cde52ba --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,5 @@ +def from_env(key, default=None): + import os + result = os.environ.get(key, default) + # assert result, "Failed to get {} env variable".format(key) + return result -- GitLab From 938529f2bed8d43f433e6053fec0e2d3f46fb0ca Mon Sep 17 00:00:00 2001 From: Ethiraj Krishnamanaidu Date: Tue, 1 Sep 2020 18:17:47 -0500 Subject: [PATCH 2/5] Initial SLB code push --- .bumpversion.cfg | 6 + .gitignore | 110 +++++++++++ README.md | 9 + osdu_gcp/__init__.py | 9 + osdu_gcp/storage/__init__.py | 0 osdu_gcp/storage/auth_gcp_sa.py | 101 ++++++++++ osdu_gcp/storage/blob_storage_gcp.py | 262 +++++++++++++++++++++++++ osdu_gcp/storage/http_client_gcp.py | 102 ++++++++++ requirements.txt | 7 + requirements_dev.txt | 3 + setup.py | 57 ++++++ tests/__init__.py | 0 tests/conftest.py | 6 + tests/storage/__init__.py | 0 tests/storage/test_blob_storage_gcp.py | 116 +++++++++++ tests/test_utils.py | 5 + 16 files changed, 793 insertions(+) create mode 100644 .bumpversion.cfg create mode 100644 .gitignore create mode 100644 osdu_gcp/__init__.py create mode 100644 osdu_gcp/storage/__init__.py create mode 100644 osdu_gcp/storage/auth_gcp_sa.py create mode 100644 osdu_gcp/storage/blob_storage_gcp.py create mode 100644 osdu_gcp/storage/http_client_gcp.py create mode 100644 requirements.txt create mode 100644 requirements_dev.txt create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/storage/__init__.py create mode 100644 tests/storage/test_blob_storage_gcp.py create mode 100644 tests/test_utils.py diff --git a/.bumpversion.cfg b/.bumpversion.cfg new file mode 100644 index 0000000..da586ff --- /dev/null +++ b/.bumpversion.cfg @@ -0,0 +1,6 @@ +[bumpversion] +current_version = 0.2.4 +commit = True +tag = False + +[bumpversion:file:osdu_gcp/__init__.py] diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6690683 --- /dev/null +++ b/.gitignore @@ -0,0 +1,110 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# ide +.idea/ + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +.venv* +env/ +venv/ +venv*/ +ENV/ +env.bak/ +venv.bak/ +.virtualenv/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/README.md b/README.md index 4ded4d5..795f68b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ # wellbore-gcp-lib +## testing + +You need valid gcp credentials and a project ID to run the unit tests. + +```bash +export TESTING_GCP_DATA_PROJECT_CREDENTIALS='path_to_jwt' +export TESTING_GCP_DATA_PROJECT_ID='xxxx' +pytest +``` diff --git a/osdu_gcp/__init__.py b/osdu_gcp/__init__.py new file mode 100644 index 0000000..25927d0 --- /dev/null +++ b/osdu_gcp/__init__.py @@ -0,0 +1,9 @@ +__version__ = '0.2.4' + +# MUSE NOT BE CHANGED MANUALLY +# The patch number (the last one) should only be updated by the build/deploy script, if changed manually +# To increase the minor or major version: +# $> pip install -U bumpversion +# $> bumpversion major | minor +# then $> git push --tag +# may fail on windows try without commit: bumpversion part --no-commit diff --git a/osdu_gcp/storage/__init__.py b/osdu_gcp/storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/osdu_gcp/storage/auth_gcp_sa.py b/osdu_gcp/storage/auth_gcp_sa.py new file mode 100644 index 0000000..4345e4a --- /dev/null +++ b/osdu_gcp/storage/auth_gcp_sa.py @@ -0,0 +1,101 @@ +import aiohttp +import json +from typing import List, Optional +import datetime +import time +import jwt +from urllib.parse import quote_plus +from urllib.parse import urlencode +from osdu.core.auth import AuthBase + + +class GoogleAccountAuth(AuthBase): + _scheme: str = 'Bearer' + """ + Based on gcloud aio project https://github.com/talkiq/gcloud-aio/blob/master/auth/gcloud/aio/auth/token.py + """ + def __init__(self, + service_file: str, + session: aiohttp.ClientSession, + scopes: List[str] = None): + """ + :param service_file: service account file + :param session: aio http session + :param scopes: scopes + """ + super().__init__() + with open(service_file, 'r') as f: + self.service_data = json.load(f) + + self.session = session + self.scopes = ' '.join(scopes or []) + self.access_token: Optional[str] = None + self.access_token_duration = 0 + self.access_token_acquired_at = datetime.datetime(1970, 1, 1) + + @property + def scheme(self) -> Optional[str]: + """ Return scheme """ + return self._scheme + + async def header_value(self) -> str: + token = await self._get_token() + return f'{self._scheme} {token}' + + async def token(self) -> str: + return await self._get_token() + + async def __call__(self): + """ return tuple scheme, credentials/token """ + token = await self._get_token() + return self._scheme, token + + @property + def email(self) -> Optional[str]: + """ Return email used in token, if available """ + return self.service_data['client_email'] + + async def _refresh_token_for_service_account(self): + token_uri = self.service_data.get('token_uri', 'https://oauth2.googleapis.com/token') + + now = int(time.time()) + assertion_payload = { + 'aud': token_uri, + 'exp': now + 3600, + 'iat': now, + 'iss': self.service_data['client_email'], + 'scope': self.scopes, + } + + # N.B. algorithm='RS256' requires an extra 240MB in dependencies... + assertion = jwt.encode(assertion_payload, + self.service_data['private_key'], + algorithm='RS256') + payload = urlencode({ + 'assertion': assertion, + 'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer', + }, quote_via=quote_plus) + + refresh_headers = {'Content-Type': 'application/x-www-form-urlencoded'} + + async with self.session.post(token_uri, data=payload, headers=refresh_headers) as resp: + if resp.status != 200: + raise Exception() #TODO + content = await resp.json() + + # print('from token ------------') + # print(content) + + self.access_token = str(content['access_token']) + self.access_token_duration = int(content['expires_in']) + self.access_token_acquired_at = datetime.datetime.utcnow() + + async def _get_token(self) -> str: + if self.access_token: + now = datetime.datetime.utcnow() + delta = (now - self.access_token_acquired_at).total_seconds() + if delta < 3000: + return self.access_token + + await self._refresh_token_for_service_account() + return self.access_token diff --git a/osdu_gcp/storage/blob_storage_gcp.py b/osdu_gcp/storage/blob_storage_gcp.py new file mode 100644 index 0000000..a82afc6 --- /dev/null +++ b/osdu_gcp/storage/blob_storage_gcp.py @@ -0,0 +1,262 @@ +# Based on https://github.com/talkiq/gcloud-aio/blob/master/storage/gcloud/aio/storage/storage.py + +import enum +import io +import mimetypes +import os +from typing import Any, Optional, Tuple, List +from urllib.parse import quote +from asyncio import sleep +from .http_client_gcp import * +from aiohttp import ClientResponseError +from osdu.core.api.storage.blob_storage_base import BlobStorageBase +from osdu.core.api.storage.blob import Blob +from .auth_gcp_sa import GoogleAccountAuth + + +try: + import ujson as json +except ImportError: + import json # type: ignore + +API_ROOT = 'https://www.googleapis.com/storage/v1/b' +API_ROOT_UPLOAD = 'https://www.googleapis.com/upload/storage/v1/b' +MAX_CONTENT_LENGTH_SIMPLE_UPLOAD = 5 * 1024 * 1024 # 5 MB +# log = logging.getLogger(__name__) + + +class UploadType(enum.Enum): + SIMPLE = 1 + RESUMABLE = 2 + + +class GCloudAioStorage(BlobStorageBase): + _scopes = ['https://www.googleapis.com/auth/devstorage.read_write'] + _access_token_dict = {} + + def __init__(self, session=None, service_account_file: Optional[str] = None): + self._client = GCloudAioHttpClient(scopes=self._scopes, session=session) + self._session = session + self._service_account_file = service_account_file + + async def _get_access_token(self, + project: str, bucket: str, + forwarded_auth: Optional = None) -> str: + if forwarded_auth is not None: + return await forwarded_auth.token() + + assert self._service_account_file, 'No credentials provided' + token_cache = self._access_token_dict + cache_key = f'{project}_{bucket}' + tenant_access_token = token_cache.get(cache_key, None) + if tenant_access_token is None: + tenant_access_token = GoogleAccountAuth(service_file=self._service_account_file, + session=self._session, + scopes=self._scopes) + token_cache[cache_key] = tenant_access_token + + return await tenant_access_token.token() + + async def delete(self, project: str, bucket: str, object_name: str, + *, auth: Optional = None, timeout: int = 10, params: dict = None, **kwargs): + # https://cloud.google.com/storage/docs/json_api/#encoding + encoded_object_name = quote(object_name, safe='') + url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}' + token = await self._get_access_token(project, bucket, auth) + await self._client.delete(url, + params=params or {}, + timeout=timeout, + auth_token=token) + + async def download(self, project: str, bucket: str, object_name: str, + *, auth: Optional = None, timeout: int = 10, **kwargs) -> bytes: + return await self._download(project, bucket, object_name, auth=auth, timeout=timeout, params={'alt': 'media'}) + + async def download_metadata(self, project: str, bucket: str, object_name: str, + *, auth: Optional = None, timeout: int = 10, ** kwargs) -> Blob: + data = await self._download(project, bucket, object_name, auth=auth, timeout=timeout) + metadata: dict = json.loads(data.decode()) + return Blob(identifier=metadata.get('id', None), + bucket=metadata.get('bucket', bucket), + name=metadata.get('name', object_name), + metadata=metadata, + acl=metadata.get('acl', None), + content_type=metadata.get('contentType', None), + time_created=metadata.get('timeCreated', None), + time_updated=metadata.get('updated', None), + size=metadata.get('size', -1) + ) + + async def list_objects(self, project: str, bucket: str, *, + auth: Optional = None, prefix: str = '', page_token: Optional[str] = None, + max_result: Optional[int] = None, timeout: int = 10, **kwargs) -> List[str]: + url = f'{API_ROOT}/{bucket}/o' + params = {'prefix': prefix} + if page_token is not None: + params['pageToken'] = page_token + if max_result is not None: + params['maxResults'] = max_result + data, _ = await self._client.get(url, response_type=ResponseType.JSON, params=params or {}, timeout=timeout, + auth_token=await self._get_access_token(project, bucket, auth)) + + return [x['name'] for x in data.get('items', list())] + + @classmethod + def build_URI(cls, bucket: str, object_name: str) -> str: + return f'gs://{bucket}/{object_name}' + + # TODO: if `metadata` is set, use multipart upload: + # https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload + async def upload(self, project: str, bucket: str, object_name: str, file_data: Any, *, + auth: Optional = None, content_type: str = None, metadata: dict = None, + timeout: int = 30, **kwargs) -> dict: + url = f'{API_ROOT_UPLOAD}/{bucket}/o' + + stream = self._preprocess_data(file_data) + content_length = self._get_stream_len(stream) + + # mime detection method same as in aiohttp 3.4.4 + content_type = content_type or mimetypes.guess_type(object_name)[0] + + parameters = {} + headers = {} + headers.update({ + 'Content-Length': str(content_length), + 'Content-Type': content_type or '', + }) + + force_resumable_upload: bool = None + upload_type = self._decide_upload_type(force_resumable_upload, content_length) + # log.debug('using %r gcloud storage upload method', upload_type) + if upload_type == UploadType.SIMPLE: + # if metadata: + # log.warning('metadata will be ignored for upload_type=Simple') + return await self._upload_simple(project, bucket, url, object_name, stream, parameters, headers, + auth=auth, timeout=timeout) + + if upload_type == UploadType.RESUMABLE: + return await self._upload_resumable(project, bucket, url, object_name, stream, parameters, headers, + auth=auth, metadata=metadata, timeout=timeout) + + raise TypeError(f'upload type {upload_type} not supported') + + @staticmethod + def _get_stream_len(stream: io.IOBase) -> int: + current = stream.tell() + try: + return stream.seek(0, os.SEEK_END) + finally: + stream.seek(current) + + @staticmethod + def _preprocess_data(data: Any) -> io.IOBase: + if data is None: + return io.StringIO('') + + if isinstance(data, bytes): + return io.BytesIO(data) + if isinstance(data, str): + return io.StringIO(data) + if isinstance(data, io.IOBase): + return data + + raise TypeError(f'unsupported upload type: "{type(data)}"') + + @staticmethod + def _decide_upload_type(force_resumable_upload: Optional[bool], + content_length: int) -> UploadType: + # force resumable + if force_resumable_upload is True: + return UploadType.RESUMABLE + + # force simple + if force_resumable_upload is False: + return UploadType.SIMPLE + + # decide based on Content-Length + if content_length > MAX_CONTENT_LENGTH_SIMPLE_UPLOAD: + return UploadType.RESUMABLE + + return UploadType.SIMPLE + + @staticmethod + def _split_content_type(content_type: str) -> Tuple[str, str]: + content_type_and_encoding_split = content_type.split(';') + content_type = content_type_and_encoding_split[0].lower().strip() + + encoding = None + if len(content_type_and_encoding_split) > 1: + encoding_str = content_type_and_encoding_split[1].lower().strip() + encoding = encoding_str.split('=')[-1] + + return content_type, encoding + + async def _download(self, project: str, bucket: str, object_name: str, * + , auth: Optional = None, params: dict = None, timeout: int = 10) -> bytes: + # https://cloud.google.com/storage/docs/json_api/#encoding + encoded_object_name = quote(object_name, safe='') + url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}' + token = await self._get_access_token(project, bucket, auth) + data, _ = await self._client.get(url, response_type=ResponseType.BYTES, + params=params or {}, timeout=timeout, auth_token=token) + # N.B. the GCS API sometimes returns 'application/octet-stream' when a + # string was uploaded. To avoid potential weirdness, always return a + # bytes object. + return data + + async def _upload_simple(self, project: str, bucket: str, url: str, object_name: str, + stream: io.IOBase, params: dict, headers: dict, + *, auth: Optional = None, timeout: int = 30) -> dict: + # https://cloud.google.com/storage/docs/json_api/v1/how-tos/simple-upload + params['name'] = object_name + params['uploadType'] = 'media' + + headers.update({'Accept': 'application/json'}) + token = await self._get_access_token(project, bucket, auth) + data, _ = await self._client.post(url, data=stream, response_type=ResponseType.JSON, headers=headers, + params=params, timeout=timeout, auth_token=token) + return data + + async def _upload_resumable(self, project: str, bucket: str, url: str, object_name: str, + stream: io.IOBase, params: dict, + headers: dict, *, metadata: dict = None, + auth: Optional = None, timeout: int = 30) -> dict: + # https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload + session_uri = await self._initiate_upload(project, bucket, url, object_name, params, headers, + auth=auth, metadata=metadata) + data: dict = await self._do_upload(project, bucket, session_uri, stream, + auth=auth, headers=headers, timeout=timeout) + return data + + async def _initiate_upload(self, project: str, bucket: str, url: str, object_name: str, params: dict, + headers: dict, *, auth: Optional = None, metadata: dict = None) -> str: + params['uploadType'] = 'resumable' + + metadict = (metadata or {}).copy() + metadict.update({'name': object_name}) + metadata = json.dumps(metadict) + + post_headers = headers.copy() + post_headers.update({ + 'X-Upload-Content-Type': headers['Content-Type'], + 'X-Upload-Content-Length': headers['Content-Length'] + }) + token = await self._get_access_token(project, bucket, auth) + _, response_headers = await self._client.post(url, response_type=ResponseType.NONE, headers=post_headers, + params=params, data=metadata, timeout=10, + auth_token=token) + session_uri: str = response_headers['Location'] + return session_uri + + async def _do_upload(self, project: str, bucket: str, session_uri: str, stream: io.IOBase, + headers: dict, *, auth: Optional = None, retries: int = 5, **kwargs) -> dict: + + for tries in range(retries): + try: + token = await self._get_access_token(project, bucket, auth) + data, _ = await self._client.put(session_uri, response_type=ResponseType.JSON, headers=headers, + data=stream, auth_token=token, **kwargs) + return data + except ClientResponseError: + headers.update({'Content-Range': '*/*'}) + await sleep(2. ** tries) diff --git a/osdu_gcp/storage/http_client_gcp.py b/osdu_gcp/storage/http_client_gcp.py new file mode 100644 index 0000000..087d591 --- /dev/null +++ b/osdu_gcp/storage/http_client_gcp.py @@ -0,0 +1,102 @@ +from typing import Optional, List, Any, Union, Tuple +from multidict import CIMultiDictProxy +import enum + +try: + import ujson as json +except ImportError: + import json # type: ignore + +__all__ = ['ResponseType', 'GCloudAioHttpClient'] + + +class ResponseType(enum.Enum): + NONE = 1 + JSON = 2 + TEXT = 3 + BYTES = 4 + + +class GCloudAioHttpClient: + + def __init__(self, scopes: List[str], session): + self._scopes: List[str] = scopes + self._session = session + + def get_session(self): + return self._session + + async def _http_call(self, session_method, url: str, *, + response_type: ResponseType = ResponseType.JSON, + auth_token=None, + data: Any = None, + headers: Optional[dict] = None, + **kwargs: Any) -> Tuple[Union[dict, bytes, str, None], 'CIMultiDictProxy[str]']: + + headers = headers or {} + + if auth_token is not None: + headers['Authorization'] = f'Bearer {auth_token}' + + if data is None: + headers['Content-Length'] = '0' + elif type(data) is dict: + data = json.dumps(data).encode('utf-8') + headers['Content-Length']: str(len(data)) + headers['Content-Type'] = 'application/json; charset=UTF-8' + elif type(data) is str: + data = data.encode('utf-8') + headers['Content-Length']: str(len(data)) + headers['Content-Type'] = 'text/html; charset=UTF-8' + else: + assert 'Content-Type' in headers + + async with session_method(url, data=data, headers=headers, **kwargs) as resp: + resp.raise_for_status() + if response_type == ResponseType.JSON: + return await resp.json(), resp.headers + if response_type == ResponseType.TEXT: + return await resp.text(), resp.headers + if response_type == ResponseType.BYTES: + return await resp.read(), resp.headers + return None, resp.headers + + async def get(self, url: str, *, + response_type: ResponseType = ResponseType.JSON, + headers: Optional[dict] = None, + auth_token=None, + **kwargs: Any) -> Tuple[Union[dict, bytes, str, None], 'CIMultiDictProxy[str]']: + session = self.get_session() + return await self._http_call(session.get, url, response_type=response_type, data=None, headers=headers, + auth_token=auth_token, **kwargs) + + async def post(self, url: str, *, + response_type: ResponseType = ResponseType.JSON, + data: Any = None, + headers: Optional[dict] = None, + auth_token=None, + **kwargs: Any) -> Tuple[Union[dict, bytes, str, None], 'CIMultiDictProxy[str]']: + session = self.get_session() + return await self._http_call(session.post, url, response_type=response_type, data=data, headers=headers, + auth_token=auth_token, **kwargs) + + async def put(self, url: str, *, + response_type: ResponseType = ResponseType.JSON, + data: Any = None, + headers: Optional[dict] = None, + auth_token=None, + **kwargs: Any) -> Tuple[Union[dict, bytes, str, None], 'CIMultiDictProxy[str]']: + session = self.get_session() + return await self._http_call(session.put, url, response_type=response_type, data=data, headers=headers, + auth_token=auth_token, **kwargs) + + async def delete(self, url: str, headers: Optional[dict] = None, auth_token=None, **kwargs: Any) -> str: + headers = headers or {} + if auth_token is not None: + headers['Authorization'] = f'Bearer {auth_token}' + + session = self.get_session() + + async with session.delete(url, headers=headers, **kwargs) as resp: + resp.raise_for_status() + return await resp.text() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7c27627 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +# osdu core lib main python +osdu-core-lib-python>=0.3.0, <0.4 + +# for google provider +aiohttp +cryptography +pyjwt diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..60f6a19 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,3 @@ +pytest>=3 +pytest-asyncio +pytest-cov diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e8d3317 --- /dev/null +++ b/setup.py @@ -0,0 +1,57 @@ +import os +import re +from setuptools import setup, find_packages + +requirements_file = os.path.abspath( + os.path.join(os.path.dirname(__file__), 'requirements.txt') +) + +test_requirements_file = os.path.abspath( + os.path.join(os.path.dirname(__file__), 'requirements_dev.txt') +) + + +# version is unified in osdu_gcp/__init__.py +def read_version(): + regexp = re.compile(r"^__version__\W*=\W*'([\d.abrc]+)'") + init_py = os.path.join(os.path.dirname(__file__), 'osdu_gcp', '__init__.py') + with open(init_py) as f: + for line in f: + match = regexp.match(line) + if match is not None: + return match.group(1) + raise RuntimeError('Cannot find version in {}'.format(init_py)) + + +with open(requirements_file) as f: + requirements = f.read().splitlines() + +with open(test_requirements_file) as f: + test_requirements = f.read().splitlines() + +setup( + name='osdu-core-lib-python-gcp', + + version=read_version(), + + packages=find_packages(), + + author="OSDU Wellbore data domain services team", + + author_email="WellboreDDMS@slb.com", + + description="OSDU python for wellbore DMS - google cloud implementations", + + long_description=open('README.md').read(), + + include_package_data=True, + + install_requires=requirements, + + tests_require=test_requirements, + + license="Apache License, Version 2.0", + + # We need py 3.7 for some of the tools used here + python_requires='>=3.7', +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..241caac --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,6 @@ +from .test_utils import from_env + + +# cloud provider dependent configurations +TESTING_GCP_DATA_PROJECT_CREDENTIALS = from_env('TESTING_GCP_DATA_PROJECT_CREDENTIALS', 'logstore-dev-sa.json') +TESTING_GCP_DATA_PROJECT_ID = from_env('TESTING_GCP_DATA_PROJECT_ID', 'logstore-dev') diff --git a/tests/storage/__init__.py b/tests/storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/storage/test_blob_storage_gcp.py b/tests/storage/test_blob_storage_gcp.py new file mode 100644 index 0000000..f2c0ffe --- /dev/null +++ b/tests/storage/test_blob_storage_gcp.py @@ -0,0 +1,116 @@ +from tests.conftest import * +import tempfile +import shutil +from osdu_gcp.storage.blob_storage_gcp import GCloudAioStorage +import pytest +import aiohttp +import uuid + + +class _TESTING_CFG: + project_id = TESTING_GCP_DATA_PROJECT_ID + credentials = TESTING_GCP_DATA_PROJECT_CREDENTIALS + bucket_name = 'testing-osdu-core' + + +TEST_DATA = { + 'initial_files': [ + # object_name , content + ('test_file_1.txt', 'content of test file 1'), + ('test_file_2.txt', 'content of test file 2') + ] +} + + +# test local and gcp storage +@pytest.fixture(params=['GCloudAioStorage']) +async def storage_client(request): + client_name = request.param + if client_name == 'GCloudAioStorage': + session = aiohttp.ClientSession() + yield GCloudAioStorage(session=session, service_account_file=_TESTING_CFG.credentials) + await session.close() + + +@pytest.fixture +async def temp_directory() -> str: + tmpdir = tempfile.mkdtemp() + yield tmpdir + + # teardown - recursively delete the tmp directory + shutil.rmtree(tmpdir, ignore_errors=True) + +@pytest.mark.asyncio +async def test_list_objects(storage_client): + result = await storage_client.list_objects(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name) + for file_name, _ in TEST_DATA['initial_files']: + assert file_name in result + + +@pytest.mark.asyncio +async def test_download(storage_client): + name, expected_content = TEST_DATA['initial_files'][0] + data = await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name) + result = data.decode('utf-8') + assert result == expected_content + + +@pytest.mark.asyncio +async def test_upload_check_delete(storage_client): + name = str(uuid.uuid4()) + content = str(uuid.uuid4()) + + # upload + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name, content, content_type='text/plain') + + # check single object with this name + result = await storage_client.list_objects(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, prefix=name) + assert result == [name] + + # check its content + data = await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name) + assert data.decode('utf-8') == content + + # delete it + await storage_client.delete(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name) + + # check nothing remains + result = await storage_client.list_objects(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, prefix=name) + assert len(result) == 0 + + +@pytest.mark.asyncio +async def test_upload_from_various_type(storage_client, temp_directory): + file_c = temp_directory + '\\testing.file' + content_bin = b'expected content 123456789' + content_str = content_bin.decode('utf-8') + with open(file_c, 'w') as f: + f.write('expected content 123456789') + + with open(file_c, 'rb') as file_bin_input: + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'file_bin_input', file_bin_input) + assert await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'file_bin_input') == content_bin + + with open(file_c, 'r') as file_txt_input: + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'file_txt_input', file_txt_input) + assert await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'file_txt_input') == content_bin + + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'str_input', content_str) + assert await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'str_input') == content_bin + + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'bin_input', content_bin) + assert await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'bin_input') == content_bin + + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'empty_input', None) + actual_data = await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'empty_input') + assert len(actual_data) == 0 + + with pytest.raises(TypeError): + await storage_client.upload(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, 'int_input', 123456) + + + + + + + diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..cde52ba --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,5 @@ +def from_env(key, default=None): + import os + result = os.environ.get(key, default) + # assert result, "Failed to get {} env variable".format(key) + return result -- GitLab From 5a5eb592b4331ba24bae7d2d24350e66c1d1079e Mon Sep 17 00:00:00 2001 From: Ethiraj Krishnamanaidu Date: Tue, 1 Sep 2020 18:25:24 -0500 Subject: [PATCH 3/5] Apache Headers --- osdu_gcp/__init__.py | 14 ++++++++++++++ osdu_gcp/storage/__init__.py | 14 ++++++++++++++ osdu_gcp/storage/auth_gcp_sa.py | 14 ++++++++++++++ osdu_gcp/storage/blob_storage_gcp.py | 14 ++++++++++++++ osdu_gcp/storage/http_client_gcp.py | 14 ++++++++++++++ setup.py | 14 ++++++++++++++ tests/__init__.py | 14 ++++++++++++++ tests/conftest.py | 14 ++++++++++++++ tests/storage/__init__.py | 14 ++++++++++++++ tests/storage/test_blob_storage_gcp.py | 14 ++++++++++++++ tests/test_utils.py | 14 ++++++++++++++ 11 files changed, 154 insertions(+) diff --git a/osdu_gcp/__init__.py b/osdu_gcp/__init__.py index 25927d0..c405396 100644 --- a/osdu_gcp/__init__.py +++ b/osdu_gcp/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + __version__ = '0.2.4' # MUSE NOT BE CHANGED MANUALLY diff --git a/osdu_gcp/storage/__init__.py b/osdu_gcp/storage/__init__.py index e69de29..e4655d7 100644 --- a/osdu_gcp/storage/__init__.py +++ b/osdu_gcp/storage/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/osdu_gcp/storage/auth_gcp_sa.py b/osdu_gcp/storage/auth_gcp_sa.py index 4345e4a..1c3898d 100644 --- a/osdu_gcp/storage/auth_gcp_sa.py +++ b/osdu_gcp/storage/auth_gcp_sa.py @@ -1,3 +1,17 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import aiohttp import json from typing import List, Optional diff --git a/osdu_gcp/storage/blob_storage_gcp.py b/osdu_gcp/storage/blob_storage_gcp.py index a82afc6..855b91c 100644 --- a/osdu_gcp/storage/blob_storage_gcp.py +++ b/osdu_gcp/storage/blob_storage_gcp.py @@ -1,3 +1,17 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Based on https://github.com/talkiq/gcloud-aio/blob/master/storage/gcloud/aio/storage/storage.py import enum diff --git a/osdu_gcp/storage/http_client_gcp.py b/osdu_gcp/storage/http_client_gcp.py index 087d591..fada30c 100644 --- a/osdu_gcp/storage/http_client_gcp.py +++ b/osdu_gcp/storage/http_client_gcp.py @@ -1,3 +1,17 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Optional, List, Any, Union, Tuple from multidict import CIMultiDictProxy import enum diff --git a/setup.py b/setup.py index e8d3317..2b959a4 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,17 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import re from setuptools import setup, find_packages diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..e4655d7 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/tests/conftest.py b/tests/conftest.py index 241caac..6983364 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,17 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .test_utils import from_env diff --git a/tests/storage/__init__.py b/tests/storage/__init__.py index e69de29..e4655d7 100644 --- a/tests/storage/__init__.py +++ b/tests/storage/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/tests/storage/test_blob_storage_gcp.py b/tests/storage/test_blob_storage_gcp.py index f2c0ffe..589d92b 100644 --- a/tests/storage/test_blob_storage_gcp.py +++ b/tests/storage/test_blob_storage_gcp.py @@ -1,3 +1,17 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from tests.conftest import * import tempfile import shutil diff --git a/tests/test_utils.py b/tests/test_utils.py index cde52ba..751abf0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,3 +1,17 @@ +# Copyright 2020 Schlumberger +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + def from_env(key, default=None): import os result = os.environ.get(key, default) -- GitLab From beb54fd29a8beb648e10896b1db6b0b51c00074a Mon Sep 17 00:00:00 2001 From: "DIR\\FSerin" Date: Thu, 1 Oct 2020 11:03:13 +0200 Subject: [PATCH 4/5] Add pipeline and skip tests if GCP service account not present --- .gitlab-ci.yml | 38 ++++++++++++++++++++++++++ requirements.txt | 2 +- requirements_opengroup.txt | 1 + tests/storage/test_blob_storage_gcp.py | 9 ++++++ 4 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 .gitlab-ci.yml create mode 100644 requirements_opengroup.txt diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..f3a7ec2 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,38 @@ +default: + image: python:3.7-slim-buster + +stages: + - test + - deploy + +build: + stage: test + script: + - echo ---- ---- ---- SYSTEM DEPENDENCIES ---- ---- ---- + - apt update + - apt install -y --no-install-recommends git + - echo ---- ---- ---- BUILD IMAGE ---- ---- ---- + - pip3 install -r requirements.txt + - pip3 install -r requirements_opengroup.txt + - pip3 install -r requirements_dev.txt + - pytest tests --junitxml=report.xml + artifacts: + when: always + reports: + junit: report.xml + +# This job only runs on master, and it creates the lib and push it to the feed +deploylib: + stage: deploy + script: + - echo ---- ---- ---- SYSTEM DEPENDENCIES ---- ---- ---- + - apt update + - apt install -y --no-install-recommends git + - echo ---- ---- ---- BUILD IMAGE ---- ---- ---- + - pip3 install -r requirements.txt + - pip3 install -r requirements_opengroup.txt + - pip3 install twine + - python3 setup.py sdist bdist_wheel + - TWINE_PASSWORD=${CI_JOB_TOKEN} TWINE_USERNAME=${CI_REGISTRY_USER} python -m twine upload --repository-url ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi dist/* + rules: + - if: $CI_COMMIT_BRANCH == 'master' diff --git a/requirements.txt b/requirements.txt index 7c27627..cc59c70 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # osdu core lib main python -osdu-core-lib-python>=0.3.0, <0.4 +#osdu-core-lib-python>=0.3.0, <0.4 # for google provider aiohttp diff --git a/requirements_opengroup.txt b/requirements_opengroup.txt new file mode 100644 index 0000000..656747c --- /dev/null +++ b/requirements_opengroup.txt @@ -0,0 +1 @@ +git+https://community.opengroup.org/osdu/platform/domain-data-mgmt-services/wellbore/lib/wellbore-core/wellbore-core-lib.git@slb-code-push#egg=osdu-core-lib-python diff --git a/tests/storage/test_blob_storage_gcp.py b/tests/storage/test_blob_storage_gcp.py index 589d92b..d8b420e 100644 --- a/tests/storage/test_blob_storage_gcp.py +++ b/tests/storage/test_blob_storage_gcp.py @@ -19,6 +19,7 @@ from osdu_gcp.storage.blob_storage_gcp import GCloudAioStorage import pytest import aiohttp import uuid +import os class _TESTING_CFG: @@ -54,7 +55,12 @@ async def temp_directory() -> str: # teardown - recursively delete the tmp directory shutil.rmtree(tmpdir, ignore_errors=True) +service_account_required = pytest.mark.skipif( + not os.path.isfile(TESTING_GCP_DATA_PROJECT_CREDENTIALS), reason="requires service account file" +) + @pytest.mark.asyncio +@service_account_required async def test_list_objects(storage_client): result = await storage_client.list_objects(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name) for file_name, _ in TEST_DATA['initial_files']: @@ -62,6 +68,7 @@ async def test_list_objects(storage_client): @pytest.mark.asyncio +@service_account_required async def test_download(storage_client): name, expected_content = TEST_DATA['initial_files'][0] data = await storage_client.download(_TESTING_CFG.project_id, _TESTING_CFG.bucket_name, name) @@ -70,6 +77,7 @@ async def test_download(storage_client): @pytest.mark.asyncio +@service_account_required async def test_upload_check_delete(storage_client): name = str(uuid.uuid4()) content = str(uuid.uuid4()) @@ -94,6 +102,7 @@ async def test_upload_check_delete(storage_client): @pytest.mark.asyncio +@service_account_required async def test_upload_from_various_type(storage_client, temp_directory): file_c = temp_directory + '\\testing.file' content_bin = b'expected content 123456789' -- GitLab From ec9053e10a28faae5300ca047ef8e13449effa6a Mon Sep 17 00:00:00 2001 From: Luc Yriarte Date: Thu, 26 Nov 2020 15:12:56 +0100 Subject: [PATCH 5/5] Remove redundant license --- LICENSE.TXT | 177 ---------------------------------------------------- 1 file changed, 177 deletions(-) delete mode 100644 LICENSE.TXT diff --git a/LICENSE.TXT b/LICENSE.TXT deleted file mode 100644 index 4947287..0000000 --- a/LICENSE.TXT +++ /dev/null @@ -1,177 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS \ No newline at end of file -- GitLab