diff --git a/src/dags/libs/upload_file.py b/src/dags/libs/upload_file.py index 73e260d2398b647465c95fbb81f831a835547a40..4014e9722b1299f397af786f6ec61397b5ced2e1 100644 --- a/src/dags/libs/upload_file.py +++ b/src/dags/libs/upload_file.py @@ -136,10 +136,14 @@ class GCSFileUploader(FileUploader): raise GCSObjectURIError @tenacity.retry(**RETRY_SETTINGS) - def get_file_from_bucket(self, bucket_name: str, source_blob_name: str) -> io.BytesIO: + def get_file_from_bucket( + self, + bucket_name: str, + source_blob_name: str + ) -> Tuple[io.BytesIO, str]: storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) - blob = bucket.blob(source_blob_name) + blob = bucket.get_blob(source_blob_name) does_exist = blob.exists() if not does_exist: @@ -149,9 +153,26 @@ class GCSFileUploader(FileUploader): file = io.BytesIO() blob.download_to_file(file) logger.debug("File got from landing zone") - return file + return file, blob.content_type - def get_file_from_preload_path(self, preload_file_path: str) -> io.BytesIO: + def get_file_from_preload_path(self, preload_file_path: str) -> Tuple[io.BytesIO, str]: bucket_name, blob_name = self._parse_object_uri(preload_file_path) - buffer = self.get_file_from_bucket(bucket_name, blob_name) - return buffer + buffer, content_type = self.get_file_from_bucket(bucket_name, blob_name) + return buffer, content_type + + def upload_file(self, preload_file_path: str) -> str: + """ + Copy file from Landing zone(preload_file_path) onto OSDU platform using File service. + Get Content-Type of this file, refresh Content-Type with this value in headers + while this file is being uploaded onto OSDU platform. + Return file_location. + """ + buffer, content_type = self.get_file_from_preload_path(preload_file_path) + file_id, signed_url = self._get_signed_url_request(self.request_headers) + + headers = self.request_headers + headers["Content-Type"] = content_type + self._upload_file_request(headers, signed_url, buffer) + + file_location = self._get_file_location_request(self.request_headers, file_id) + return file_location diff --git a/src/dags/libs/validate_schema.py b/src/dags/libs/validate_schema.py index 9506e1281253828a1398c7feeac762f2e1d23200..4ae82b47f446443fdae768d886bee6b756cb9844 100644 --- a/src/dags/libs/validate_schema.py +++ b/src/dags/libs/validate_schema.py @@ -111,6 +111,8 @@ class SchemaValidator(HeadersMixin): """ if not schema: schema = self.get_schema(manifest["kind"]) + if schema["properties"].get("id"): + schema["properties"]["id"].pop("pattern", None) logger.debug(f"Validating kind {manifest['kind']}") resolver = OSDURefResolver(schema_service=self.schema_service, base_uri=schema.get("$id", ""), referrer=schema, diff --git a/tests/plugin-unit-tests/test_file_uplaod.py b/tests/plugin-unit-tests/test_file_upload.py similarity index 97% rename from tests/plugin-unit-tests/test_file_uplaod.py rename to tests/plugin-unit-tests/test_file_upload.py index 9042b30cf5f7138c702c30498676067a379058f2..246d768ccf1d7f0f38182629e40fda8a3dc5b8c4 100644 --- a/tests/plugin-unit-tests/test_file_uplaod.py +++ b/tests/plugin-unit-tests/test_file_upload.py @@ -48,7 +48,8 @@ class TestSourceFileChecker: file_uploader: GCSFileUploader ): file = io.RawIOBase() - monkeypatch.setattr(file_uploader, "get_file_from_bucket", lambda *args, **kwargs: file) + monkeypatch.setattr(file_uploader, "get_file_from_bucket", + lambda *args, **kwargs: (file, "test")) file_uploader.upload_file("gs://test/test") @pytest.mark.parametrize(