Commit 593dea6a authored by Spencer Sutton's avatar Spencer Sutton
Browse files

Merge

parents d98bc826 88ea1878
.vscode/*
.idea/*
# energistics/src/witsml_parser/osdu/*
energistics/src/witsml_parser/osdu/*
!energistics/src/witsml_parser/osdu/.gitkeep
......@@ -13,26 +13,127 @@
# See the License for the specific language governing permissions and
# limitations under the License.
image: google/cloud-sdk:alpine
image: docker:19.03
variables:
OSDU_AIRFLOW_IMAGE: eu.gcr.io/osdu-cicd-epam/airflow-python-dags/airflow-python-dags:latest
OSDU_AIRFLOW_IMAGE: johnybear/osdu-airflow:python36-1
OSDU_GCP_VENDOR: gcp
OSDU_GCP_APPLICATION_NAME: energistics
WORKDIR: energistics
SRCDIR: energistics/src
SRCDIR: energistics/src/
TESTDIR: energistics/tests
VENDOR_NAME: energistics
BASE_IMAGE_TAG: $CI_REGISTRY_IMAGE/witsml-parser:$CI_COMMIT_SHA
OSDU_GCP_IMAGE_TAG: $CI_REGISTRY_IMAGE/osdu-gcp-witsml-parser:$CI_COMMIT_SHA
stages:
- test
- containerize
- csp_containerize
- deploy
dags_rsync:
build_base_image:
stage: containerize
tags: ["osdu-medium"]
script:
- echo ---- ---- ---- SYSTEM DEPENDENCIES ---- ---- ----
- apk update
- apk add git
- echo ---- ---- ---- CLONE REPOSITORIES ---- ---- ----
- >
cd energistics/src/witsml_parser/osdu
&& git clone https://community.opengroup.org/osdu/platform/system/sdks/common-python-sdk.git
&& git clone https://community.opengroup.org/osdu/platform/data-flow/ingestion/ingestion-dags.git
&& cd - && ls
- echo ---- ---- ---- BUILD IMAGE ---- ---- ----
- docker build -t $BASE_IMAGE_TAG --rm -f ./build/Dockerfile .
- docker build -t witsml:test --rm -f ./build/test/Dockerfile --build-arg base_image=$BASE_IMAGE_TAG .
- echo ---- ---- ---- TESTS ---- ---- ----
- docker run witsml:test || exit $?
- echo ---- ---- ---- PUSH IMAGE ---- ---- ----
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- docker push $BASE_IMAGE_TAG
when: manual
build_osdu_gcp_image:
stage: csp_containerize
tags: ["osdu-medium"]
needs: ["build_base_image"]
script:
- echo ---- ---- ---- SYSTEM DEPENDENCIES ---- ---- ----
- apk update
- apk add git
- echo ---- ---- ---- BUILD IMAGE ---- ---- ----
- docker build -t $OSDU_GCP_IMAGE_TAG --rm -f ./build/providers/$OSDU_GCP_VENDOR/Dockerfile.energistics --build-arg PROVIDER_NAME=$OSDU_GCP_VENDOR --build-arg base_image=$BASE_IMAGE_TAG .
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- echo ---- ---- PUSH IMAGE
- docker push $OSDU_GCP_IMAGE_TAG
- sed -i "s|IMAGE|$OSDU_GCP_IMAGE_TAG|" devops/osdu-gcp/airflow_configs.yaml
when: manual
artifacts:
when: always
paths:
- devops/osdu-gcp/airflow_configs.yaml
expire_in: 1 week
except:
- /^release\/*/
build_osdu_gcp_release_image:
stage: csp_containerize
tags: ["osdu-medium"]
needs: ["build_base_image"]
variables:
OSDU_GCP_IMAGE_TAG: $CI_REGISTRY_IMAGE/osdu-gcp-witsml-parser
script:
- echo ---- ---- ---- SYSTEM DEPENDENCIES ---- ---- ----
- apk update
- apk add git
- echo ---- ---- ---- BUILD IMAGE ---- ---- ----
- docker build -t $OSDU_GCP_IMAGE_TAG:release-latest --rm -f ./build/providers/$OSDU_GCP_VENDOR/Dockerfile.energistics --build-arg PROVIDER_NAME=$OSDU_GCP_VENDOR --build-arg base_image=$BASE_IMAGE_TAG .
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- echo ---- ---- PUSH IMAGE
- docker push $OSDU_GCP_IMAGE_TAG:release-latest
- if [ -n "$CI_COMMIT_TAG" ]; then docker tag $OSDU_GCP_IMAGE_TAG:release-latest $OSDU_GCP_IMAGE_TAG:$CI_COMMIT_TAG && push $OSDU_GCP_IMAGE_TAG:$CI_COMMIT_TAG; fi
when: manual
only:
- /^release\/*/
osdu_gcp_dags_rsync:
image: gcr.io/google.com/cloudsdktool/cloud-sdk
stage: deploy
needs: [build_osdu_gcp_image]
script:
- gcloud auth activate-service-account --key-file $OSDU_GCP_DEPLOY_FILE
- cd "$SRCDIR"
- gsutil -m rsync -d -R dags/"$VENDOR_NAME"/ $OSDU_GCP_DEPL_TARGET/dags/"$VENDOR_NAME"
- gsutil -m rsync -R plugins/"$VENDOR_NAME"/ $OSDU_GCP_DEPL_TARGET/plugins/"$VENDOR_NAME"
- gcloud config set project "$OSDU_GCP_PROJECT"
- sed -i "s|KEY_PATH|$OSDU_GCP_KEY_PATH|" devops/osdu-gcp/airflow_configs.yaml
- pushd "$SRCDIR"
- ls
- echo *******
- gsutil -m rsync -d -R dags/"$VENDOR_NAME"/ "$OSDU_GCP_DEPL_TARGET"/dags/"$VENDOR_NAME"
- popd
- gsutil cp devops/osdu-gcp/airflow_configs.yaml "$OSDU_GCP_DEPL_TARGET"/dags/configs/
only:
variables:
- $CI_COMMIT_REF_PROTECTED
when: manual
osdu_gcp_dags_prod_rsync:
image: gcr.io/google.com/cloudsdktool/cloud-sdk
stage: deploy
needs: [build_osdu_gcp_image]
script:
- gcloud auth activate-service-account --key-file $OSDU_GCP_PROD_SVC_ACCT
- gcloud config set project "$OSDU_GCP_PROD_PROJECT"
- sed -i "s|KEY_PATH|$OSDU_GCP_PROD_KEY_PATH|" devops/osdu-gcp/airflow_configs.yaml
- pushd "$SRCDIR"
- ls
- echo *******
- gsutil -m rsync -d -R dags/"$VENDOR_NAME"/ "$OSDU_GCP_DEPL_TARGET_PREPROD"/dags/"$VENDOR_NAME"
- popd
- gsutil cp devops/osdu-gcp/airflow_configs.yaml "$OSDU_GCP_DEPL_TARGET_PREPROD"/dags/configs/
only:
- /^release\/*/
when: manual
......@@ -29,11 +29,9 @@ ENV OSDU_CORE_PROVIDERS_DIR="${WITSML_PARSER_DIR}/osdu/ingestion-dags/src/dags/p
COPY ./${BUILD_DIR}/requirements.txt ./
COPY ./${WITSML_PARSER_DIR}/*.py ./
COPY ./${ENERGISTICS_LIBS} ./energistics
COPY build/osdu_api-0.0.5.tar.gz ./
# TODO: Configure Package Registry for Python SDK
RUN pip install --upgrade pip && \
pip install osdu_api-0.0.5.tar.gz && \
pip install -r "./energistics/requirements.txt" && \
pip install -r "./requirements.txt"
......
# Copyright 2021 Google LLC
# Copyright 2021 EPAM
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.​
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG docker_builder_image
FROM ${docker_builder_image}
......
# Copyright © 2020 Amazon Web Services
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.​
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......
# Copyright © 2020 Amazon Web Services
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.​
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......
# Copyright © 2020 Amazon Web Services
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.​
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -21,7 +20,7 @@ data_workflow_url=%(API_BASE_URL)s/api/data-workflow/v1
file_dms_url=%(API_BASE_URL)s/api/filedms/v2
dataset_url=%(API_BASE_URL)s/api/dataset/v1
entitlements_url=%(API_BASE_URL)s/api/entitlements/v1
schema_url=blah
schema_url=%(API_BASE_URL)s/api/schema-service/v1
use_service_principal=True
[provider]
......
# Copyright 2021 Google LLC
# Copyright 2021 EPAM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG base_image
FROM ${base_image}
ENV HOME="/home/witsml_parser"
WORKDIR $HOME
ARG PROVIDER_NAME
ENV CLOUD_PROVIDER $PROVIDER_NAME
COPY "build/providers/${PROVIDER_NAME}/requirements.txt" ./
RUN pip install -r "requirements.txt"
# Copyright 2021 Google LLC
# Copyright 2017-2019, Schlumberger
# Copyright 2021 EPAM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
steps:
- id: base
name: 'gcr.io/cloud-builders/docker'
args: [
'build',
'--build-arg', 'PROVIDER_NAME=${_PROVIDER_NAME}',
'-t', 'gcr.io/$PROJECT_ID/${_APPLICATION_NAME}/${_GCP_SERVICE}-${_PROVIDER_NAME}-builder:witsml',
'-f', 'build/Dockerfile',
'.'
]
waitFor: ['-']
- name: 'gcr.io/cloud-builders/docker'
args: [
'build',
'--build-arg', 'PROVIDER_NAME=${_PROVIDER_NAME}',
'--build-arg', 'builder_image=gcr.io/$PROJECT_ID/${_APPLICATION_NAME}/${_GCP_SERVICE}-${_PROVIDER_NAME}-builder:witsml',
'-t', 'gcr.io/$PROJECT_ID/${_APPLICATION_NAME}/${_GCP_SERVICE}-${_PROVIDER_NAME}:${_SHORT_SHA}',
'-t', 'gcr.io/$PROJECT_ID/${_APPLICATION_NAME}/${_GCP_SERVICE}-${_PROVIDER_NAME}:latest',
'-f', 'build/providers/${_PROVIDER_NAME}/Dockerfile.energistics',
'.'
]
waitFor: ["base"]
images:
- 'gcr.io/$PROJECT_ID/${_APPLICATION_NAME}/${_GCP_SERVICE}-${_PROVIDER_NAME}-builder'
- 'gcr.io/$PROJECT_ID/${_APPLICATION_NAME}/${_GCP_SERVICE}-${_PROVIDER_NAME}'
\ No newline at end of file
# Copyright 2021 Google LLC
# Copyright 2021 EPAM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM python:3.6-slim
ENV HOME="/home/witsml_parser"
WORKDIR $HOME
ENV BUILD_DIR="build"
ENV WITSML_PARSER_DIR="energistics/src/witsml_parser"
ENV ENERGISTICS_LIBS="${WITSML_PARSER_DIR}/energistics"
ENV OSDU_CORE_LIBS_DIR="${WITSML_PARSER_DIR}/osdu/ingestion-dags/src/dags/libs"
ENV OSDU_CORE_PROVIDERS_DIR="${WITSML_PARSER_DIR}/osdu/ingestion-dags/src/dags/providers"
COPY ./${BUILD_DIR}/providers/ibm/requirements.txt ./
COPY ./${WITSML_PARSER_DIR}/*.py ./
COPY ./${ENERGISTICS_LIBS} ./energistics
COPY ./${BUILD_DIR}/providers/ibm/osdu_api.ini ./
# TODO: Configure Package Registry for Python SDK
RUN pip install --upgrade pip && \
pip install -r "./energistics/requirements.txt" && \
pip install -r "./requirements.txt"
ENV PYTHONPATH="$PYTHONPATH:$HOME"
[environment]
data_partition_id=osdu
storage_url=%(API_BASE_URL)s/osdu-storage/api/storage/v2
search_url=%(API_BASE_URL)s/osdu-search/api/search/v2
legal_url=%(API_BASE_URL)s/osdu-legal/api/legal/v1
data_workflow_url=%(API_BASE_URL)s/osdu-workflow/api/workflow/v1
file_dms_url=%(API_BASE_URL)s/osdu-filedms/api/dms/file/v1
dataset_url=%(API_BASE_URL)s/osdu-dataset/api/dataset/v1
entitlements_url=%(API_BASE_URL)s/osdu-entitlements/api/entitlements/v1
schema_url=%(API_BASE_URL)s/osdu-schema/api/schema-service/v1
use_service_principal=True
file_service_host=%(API_BASE_URL)s/osdu-file/api/file
boto3
twine==3.2.0
pyyaml==5.4.1
requests==2.25.1
tenacity
python-keycloak==0.24.0
dataclasses==0.8;python_version<"3.7"
--extra-index-url https://community.opengroup.org/api/v4/projects/148/packages/pypi/simple
# master
osdu_api==0.0.5.dev0+0b1913fd
# ibm-cloud-storage
# osdu_api==0.0.5.dev0+0de883e8
twine==3.2.0
pyyaml==5.4.1
requests==2.25.1
dataclasses==0.8
tenacity
dataclasses==0.8;python_version<"3.7"
--extra-index-url https://community.opengroup.org/api/v4/projects/148/packages/pypi/simple
osdu_api==0.0.5.dev0+0b49bb3e
......@@ -13,13 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
FROM witsml_parser:latest
ARG base_image
FROM ${base_image}
ENV HOME="/home/witsml_parser"
WORKDIR $HOME
ENV CLOUD_PROVIDER="gcp"
COPY "build/providers/${CLOUD_PROVIDER}/requirements.txt" ./
COPY "build/test/requirements.txt" ./
COPY "energistics/src/docker_test" ./test
RUN pip install -r "requirements.txt"
CMD pytest test
energistics:
energistics_witsml_parser_k8s_operator:
image: "IMAGE"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- pool-1
env_vars:
SA_FILE_PATH: "KEY_PATH"
resources:
request_memory: "1Gi"
request_cpu: 200m
limit_memory: 1Gi
limit_cpu: 1000m
......@@ -70,17 +70,18 @@ A factory mechanism is used to associate the MIME type of the file (or file part
It is the responsibility of each individual parser, to make the translation from Energistics classes to OSDU classes.
# The WITSML Parsers for OSDU Ingestion DAG
## Getting Started
Before start, add `$WITSML_IMAGE` path to Environment. It will be used by KubernetesPodOperator later.
In general, workflow looks like:
In general, the workflow looks like:
1. Upload a file to any storage accessible for Aiflow
1. Call the Workflow service passing the path to the file (see the example bellow)
1. The DAG is triggered by the Ingestion/Workflow Service. This DAG gets the file, parses it, creates a Manifest, validates it, and writes it into the Storage Service
1. Upload a file to any storage accessible for Airflow
1. Call Workflow service passing the path to the file (see the example bellow)
1. The DAG is triggered by Ingestion/Workflow Service. This DAG gets the file, parses it, creates a Manifest, validates it, and writes it into the Storage Service
Example of submitting ingestion using Workflow service:
......@@ -119,8 +120,7 @@ curl --location --request POST 'https://<workflow-url>/v1/workflow/Energistics_x
"version": 5
}
}
}
}'
```
### Custom Libs
......@@ -129,16 +129,10 @@ The best working approach is to place custom libs into the `witsml_parser/energi
```
project
├──energistics/
├──energistics/src/
| ├──dags/
| | └── energistics/
| | └──witsml_parser_dag.py
| └──plugins/
| | └──energistics/
| | ├──operators/
| | | └── ...
| | └──hooks/
| | └── ...
| └──witsml_parser/
| └──energistics/
|
......@@ -157,9 +151,6 @@ Build the base image:
# clone OSDU source code into energistics/src/witsml_parser/osdu
cd energistics/src/witsml_parser/osdu
git clone git@community.opengroup.org:osdu/platform/system/sdks/common-python-sdk.git
git clone git@community.opengroup.org:osdu/platform/data-flow/ingestion/ingestion-dags.git
# return to root dir
cd -
......@@ -168,6 +159,15 @@ docker build -t witsml_parser -f build/Dockerfile .
```
To test the base image.
```shell
# build a test image
docker build -t test-parser -f build/test/Dockerfile .
docker run --rm -i test-parser
```
Then, if it is needed, build a provider-specific image (example of Dockerfile is in `build/providers/gcp`).
You must place your Dockerfile and requirements.txt into `build/providers/<your-provider>`.
```shell
......@@ -192,44 +192,6 @@ WITSML Parser is executed in KubernetesPodOperator, that requires certain config
These configurations may differ depending on a provider. The current approach is to store the configurations in the `.yaml`
file. The path to the file must be specified in environmental variable `$OSDU_API_CONFIG`. The following configs must be added to the file:
```yaml
energistics: # vendor name
energistics_witsml_parser_k8s_operator: # k8s configs for WITSML parser
image: <path to a image> # required
affinity: # not required
# it can be your own settings here
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: string
operator: In
values:
- string
env_vars: # not required
# Specify provider-specific vars
DUMMY: dummy_value
resources: # not required
# Specify your own resources
request_memory: string
request_cpu: string
limit_memory: string
limit_cpu: string
... # other params
```
Where:
* `$CONTEXT` is a value of the `execution_context` field
* `$FILE_URL` is a path to the File service
* `$OUT` is a path to the file, where a manifest will be saved (to pass the result in XCom use `/airflow/xcom/return.json`).
Push your image to any registry and add the `$WITSML_IMAGE` variable to use it in KubernetesPodOperator.
### Setup KubernetesPodOperator
WITSML Parser is executed in KubernetesPodOperator, that requires certain configurations.
These configurations may differ depending on a provider. The current approach is to store the configurations in the `.yaml`
file. The path to the file must be specified in environmental variable `$OSDU_API_CONFIG`. The following configs must be added to the file:
```yaml
energistics: # vendor name
energistics_witsml_parser_k8s_operator: # k8s configs for WITSML parser
......@@ -281,31 +243,6 @@ DAG's tasks:
1. *process_single_manifest_file_task* prepare every entity to be stored on Storage sevice and store it.
1. *update_status_finished_task* requires *workflowID* to change a workflow state from *running* to either *finished* or *failed*. If any previous task is failed, the DAG is marked as failed and the status *failed* is sent to the Workflow Service, otherwise the DAG is marked as successful and the status *finished* is sent to the Workflow Service.
Where:
* `energistics_witsml_parser_k8s_operator` is configurations for WITSML Parser's `KubernetesPodOperator`
* `image` (required) is a path to the image
Besides `image` you can specify other params (as it is in the example), and they will be passed to `KubernetesPodOperator`
as `**kwargs`.
The list of possible parameters is here:
https://github.com/apache/airflow/blob/master/airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py#L178
### Airflow DAG
The tasks sequence looks like:
*update_status_running_task* -> *witsml_parser_task* -> *validate_schema_task* -> *ensure_integrity_task* -> *process_single_manifest_file_task* -> *update_status_finished_task*
DAG's tasks:
1. *update_status_running_task* requires a *workflowID* to change a workflow state from *submitted* to *running*
1. *witsml_parser_task* requires a *PreloadFIlePath* value. This task is running using KubernetesPodOperator. It saves a file on OSDU Platform, gets the file's content and parse it, returns parsed Manifest in XCom.
1. *validate_schema_task* validate every record against schemas in the manifest from the previous task.
1. *ensure_integrity_task* ensures data integrity by checking whether referent entities are in the Manifest or on OSDU Platform.
1. *process_single_manifest_file_task* prepare every entity to be stored on Storage sevice and store it.
1. *update_status_finished_task* requires *workflowID* to change a workflow state from *running* to either *finished* or *failed*. If any previous task is failed, the DAG is marked as failed and the status *failed* is sent to the Workflow Service, otherwise the DAG is marked as successful and the status *finished* is sent to the Workflow Service.
## Prerequisites
WITSML Parser requires the following packages:
......@@ -321,39 +258,32 @@ pyyaml
## Testing
To prevent Airflow from unexpected collecting and processing testing code, it would be better to keep tests separate from the source code.
```
project
├──energistics/
| ├── dags/
| └── plugins/
└──tests/
└── ...
```
To test the base image it is needed to create and run a testing one.
As long as the `test` and `src` folders are separated, we should make tests see the source code. So it makes sense to add an environmental variable with a path to the tested modules and add this variable's value to `sys.path` in tests.
Build the base image:
```shell
# clone OSDU source code into energistics/src/witsml_parser/osdu
cd energistics/src/witsml_parser/osdu
In the tests, before tests are run, we must set *AIRFLOW_SRC_DIR*:
git clone git@community.opengroup.org:osdu/platform/system/sdks/common-python-sdk.git
git clone git@community.opengroup.org:osdu/platform/data-flow/ingestion/ingestion-dags.git
```
export AIRFLOW_SRC_DIR=/home/yan/work/osdu/osdu/os-dagsv2/ingestion-dags/src
```
# return to root dir
cd -
Then in tests append this variable's value to PYTHONPATH:
# build an image
docker build -t witsml_parser -f build/Dockerfile .
```python
import os
import sys