From d7739ee0234f6d392508e9f5ec0e97c2a9722893 Mon Sep 17 00:00:00 2001 From: Harshit Aggarwal Date: Thu, 16 Sep 2021 20:59:13 +0530 Subject: [PATCH 1/7] init --- deployments/scripts/azure/build_dag.sh | 10 ++-------- deployments/scripts/azure/create_dag.py | 6 ++---- .../scripts/azure/dockerFolder/build_dag_dockerfile | 1 - deployments/scripts/azure/prepare_dags.sh | 12 ++++++------ devops/azure/dev-pipeline.yml | 2 +- 5 files changed, 11 insertions(+), 20 deletions(-) diff --git a/deployments/scripts/azure/build_dag.sh b/deployments/scripts/azure/build_dag.sh index 4b52f531..c92ad45c 100644 --- a/deployments/scripts/azure/build_dag.sh +++ b/deployments/scripts/azure/build_dag.sh @@ -17,6 +17,7 @@ echo "Building Dag -- START "; echo "------------------------------------"; export NAMESPACE=airflow +export BUILD_VERSION=$BUILD_VERSION if [ -z $AZURE_DNS_NAME ]; then echo 'ERROR: AZURE_DNS_NAME not provided' @@ -33,17 +34,10 @@ else fi export SHARED_TENANT -if [ -z $PARSER_IMAGE ]; then - CSV_DOCKER_IMAGE=${AZURE_REGISTRY}.azurecr.io/${CI_PROJECT_NAME}-${CI_COMMIT_REF_SLUG}:${CI_COMMIT_SHA} -else - CSV_DOCKER_IMAGE=${PARSER_IMAGE} -fi -export CSV_DOCKER_IMAGE - echo "AZURE_DNS_NAME: $AZURE_DNS_NAME" echo "WORKFLOW_URL: $WORKFLOW_URL" echo "SHARED_TENANT: $SHARED_TENANT" -echo "CSV_DOCKER_IMAGE: $CSV_DOCKER_IMAGE" +echo "BUILD_VERSION: $BUILD_VERSION" printf "\n" echo "Execute Python Script create_dag.py"; diff --git a/deployments/scripts/azure/create_dag.py b/deployments/scripts/azure/create_dag.py index 4ea7c774..f86e7166 100644 --- a/deployments/scripts/azure/create_dag.py +++ b/deployments/scripts/azure/create_dag.py @@ -8,12 +8,11 @@ from jinja2 import Environment, FileSystemLoader class CreateDag: def __init__(self, file_path): - self.docker_image = os.environ.get('CSV_DOCKER_IMAGE') + self.build_version = os.environ.get("BUILD_VERSION") + self.docker_image = f"{{ var.value.azure_container_registry }}/osdu-csv-parser:{self.build_version}" self.dns_host_name = "{{ var.value.azure_dns_host }}" self.namespace = os.environ.get('NAMESPACE') self.file_path = file_path - self.service_name = self.docker_image[self.docker_image.index("/") + 1: self.docker_image.index(":")] - self.build_version = self.docker_image[self.docker_image.index(":") + 1:] self.dag_name = f"csv-parser-{self.build_version}"[:64] self.env_vars = { @@ -34,7 +33,6 @@ class CreateDag: print(f"docker_image: {self.docker_image}") print(f"file_path: {self.file_path}") - print(f"service_name: {self.service_name}") print(f"build_version: {self.build_version}") print(f"dag_name: {self.dag_name}") print(f"docker_image: {self.docker_image}") diff --git a/deployments/scripts/azure/dockerFolder/build_dag_dockerfile b/deployments/scripts/azure/dockerFolder/build_dag_dockerfile index 6ba50105..3f7b0d29 100644 --- a/deployments/scripts/azure/dockerFolder/build_dag_dockerfile +++ b/deployments/scripts/azure/dockerFolder/build_dag_dockerfile @@ -6,7 +6,6 @@ ARG AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR=/ ENV PYTHONUNBUFFERED=1 \ PATH="/home/osdu/.local/bin:${PATH}" \ - CSV_PARSER_IMAGE=${PARSER_IMAGE} \ SHARED_TENANT=${SHARED_TENANT} \ AZURE_DNS_NAME=$AZURE_DNS_NAME diff --git a/deployments/scripts/azure/prepare_dags.sh b/deployments/scripts/azure/prepare_dags.sh index 378b16ab..f1c522d8 100644 --- a/deployments/scripts/azure/prepare_dags.sh +++ b/deployments/scripts/azure/prepare_dags.sh @@ -29,27 +29,27 @@ if [ "$AZURE_GITLAB_PIPELINE_RUN" == "true" ]; then docker push $CI_REGISTRY_IMAGE/$DAG_IMAGE echo "Pushing to Azure Container Registry" - docker tag $CI_REGISTRY_IMAGE/$PARSER_IMAGE ${AZURE_REGISTRY}.azurecr.io/$PARSER_IMAGE + docker tag $CI_REGISTRY_IMAGE/$PARSER_IMAGE ${AZURE_REGISTRY}.azurecr.io/osdu-csv-parser:$CI_COMMIT_SHA docker tag $CI_REGISTRY_IMAGE/$DAG_IMAGE ${AZURE_REGISTRY}.azurecr.io/$DAG_IMAGE - docker push ${AZURE_REGISTRY}.azurecr.io/$PARSER_IMAGE + docker push ${AZURE_REGISTRY}.azurecr.io/osdu-csv-parser:$BUILD_VERSION docker push ${AZURE_REGISTRY}.azurecr.io/$DAG_IMAGE - export PARSER_IMAGE=${AZURE_REGISTRY}.azurecr.io/$PARSER_IMAGE + export BUILD_VERSION=$CI_COMMIT_SHA export DAG_IMAGE=$CI_REGISTRY_IMAGE/$DAG_IMAGE else echo "Build docker images" - docker build --file ./$AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR/dockerFolder/csv_parser_dockerfile -t $PARSER_IMAGE . + docker build --file ./$AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR/dockerFolder/csv_parser_dockerfile -t ${AZURE_REGISTRY}.azurecr.io/osdu-csv-parser:$BUILD_VERSION . docker build --build-arg AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR=$AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR --file ./$AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR/dockerFolder/build_dag_dockerfile -t $DAG_IMAGE . echo "Pushing to Azure Container Registry" - docker push $PARSER_IMAGE + docker push ${AZURE_REGISTRY}.azurecr.io/osdu-csv-parser:$BUILD_VERSION docker push $DAG_IMAGE fi cat > .env << EOF -PARSER_IMAGE=$PARSER_IMAGE +BUILD_VERSION=$BUILD_VERSION SHARED_TENANT=$SHARED_TENANT AZURE_DNS_NAME=$AZURE_DNS_NAME EOF diff --git a/devops/azure/dev-pipeline.yml b/devops/azure/dev-pipeline.yml index e6c95679..2772dedf 100644 --- a/devops/azure/dev-pipeline.yml +++ b/devops/azure/dev-pipeline.yml @@ -60,7 +60,7 @@ stages: parameters: dockerfilePath: 'deployments/scripts/azure/dockerFolder/output_dags_dockerfile' outputDagFolder: '/home/output_dags' - environmentVars: 'AZURE_REGISTRY=$(CONTAINER_REGISTRY_NAME)${NEWLINE}AZURE_PRINCIPAL_ID=$(AZURE_DEPLOY_CLIENT_ID)${NEWLINE}AZURE_PRINCIPAL_SECRET=$(AZURE_DEPLOY_CLIENT_SECRET)${NEWLINE}AZURE_TENANT_ID=$(AZURE_DEPLOY_TENANT)${NEWLINE}PARSER_IMAGE=$(CONTAINER_REGISTRY_NAME).azurecr.io/csv-parser-dev:$(Build.SourceVersion)${NEWLINE}DAG_IMAGE=$(CONTAINER_REGISTRY_NAME).azurecr.io/csv-parser-dag-dev:$(Build.SourceVersion)${NEWLINE}SHARED_TENANT=$(OSDU_TENANT)${NEWLINE}AZURE_DNS_NAME=$(AZURE_DNS_NAME)${NEWLINE}AZURE_GITLAB_PIPELINE_RUN=false' + environmentVars: 'AZURE_REGISTRY=$(CONTAINER_REGISTRY_NAME)${NEWLINE}AZURE_PRINCIPAL_ID=$(AZURE_DEPLOY_CLIENT_ID)${NEWLINE}AZURE_PRINCIPAL_SECRET=$(AZURE_DEPLOY_CLIENT_SECRET)${NEWLINE}AZURE_TENANT_ID=$(AZURE_DEPLOY_TENANT)${NEWLINE}BUILD_VERSION=$(Build.SourceVersion)${NEWLINE}DAG_IMAGE=$(CONTAINER_REGISTRY_NAME).azurecr.io/csv-parser-dag-dev:$(Build.SourceVersion)${NEWLINE}SHARED_TENANT=$(OSDU_TENANT)${NEWLINE}AZURE_DNS_NAME=$(AZURE_DNS_NAME)${NEWLINE}AZURE_GITLAB_PIPELINE_RUN=false' - template: /devops/dag-pipeline-stages/copy-dag.yml@TemplateRepo parameters: -- GitLab From 556a0fea42ff742896c50c7a0907553b01cf7f79 Mon Sep 17 00:00:00 2001 From: Harshit Aggarwal Date: Fri, 24 Sep 2021 14:19:31 +0530 Subject: [PATCH 2/7] update docker image logic --- airflowdags/csv_ingestion_all_steps.py | 11 +++++++---- deployments/scripts/azure/create_dag.py | 12 ++++-------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/airflowdags/csv_ingestion_all_steps.py b/airflowdags/csv_ingestion_all_steps.py index 508fbf80..c81c3b2a 100644 --- a/airflowdags/csv_ingestion_all_steps.py +++ b/airflowdags/csv_ingestion_all_steps.py @@ -29,6 +29,10 @@ run_id = "{{ dag_run.conf['run_id'] }}" data_service_to_use = "{{ dag_run.conf['execution_context'].get('data_service_to_use', 'file') }}" steps = ["LOAD_FROM_CSV", "TYPE_COERCION", "ID", "ACL", "LEGAL", "KIND", "META", "TAGS", "UNIT", "CRS", "RELATIONSHIP", "STORE_TO_OSDU"] +# Get environment variables +# TODO: put env vars here from application.properties +env_vars = {| ENV_VARS or {} |} + # Constants DAG_NAME = "{| DAG_NAME |}" DOCKER_IMAGE = "{| DOCKER_IMAGE |}" @@ -44,12 +48,11 @@ params = { "dataServiceName": "{{ dag_run.conf['execution_context'].get('data_service_to_use', 'file') }}" } -# Get environment variables -# TODO: put env vars here from application.properties -env_vars = {| ENV_VARS or {} |} if data_service_to_use: env_vars["data_service_to_use"] = data_service_to_use +env_vars["docker_image"] = DOCKER_IMAGE + operator_kwargs = {| K8S_POD_OPERATOR_KWARGS or {} |} with DAG(DAG_NAME, default_args=default_args) as dag: @@ -64,7 +67,7 @@ with DAG(DAG_NAME, default_args=default_args) as dag: env_vars=env_vars, arguments=[dumps(params)], is_delete_operator_pod=True, - image=DOCKER_IMAGE, + image=env_vars["docker_image"], **operator_kwargs) update_status_finished = UpdateStatusOperator( diff --git a/deployments/scripts/azure/create_dag.py b/deployments/scripts/azure/create_dag.py index a4b6638d..cb24b85a 100644 --- a/deployments/scripts/azure/create_dag.py +++ b/deployments/scripts/azure/create_dag.py @@ -9,11 +9,9 @@ class CreateDag: def __init__(self, file_path): self.build_version = os.environ.get("BUILD_VERSION") - self.docker_image = f"{{ var.value.azure_container_registry }}/osdu-csv-parser:{self.build_version}" self.dns_host_name = "{{ var.value.azure_dns_host }}" self.namespace = os.environ.get('NAMESPACE') self.dag_folder_path = os.environ.get('CSV_PARSER_DAG_FOLDER_PATH', '../airflowdags/osdu_csv_parser') - self.file_path = file_path self.dag_name = f"csv-parser-{self.build_version}"[:64] @@ -34,16 +32,14 @@ class CreateDag: } self.kubernetes_pod_operator_options = { - "labels": { - "aadpodidbinding": "osdu-identity" + "labels": { + "aadpodidbinding": "osdu-identity" + } } - } - print(f"docker_image: {self.docker_image}") print(f"file_path: {self.file_path}") print(f"build_version: {self.build_version}") print(f"dag_name: {self.dag_name}") - print(f"docker_image: {self.docker_image}") def _render_template(self, file) -> str: env = Environment( @@ -54,7 +50,7 @@ class CreateDag: params = { "DAG_NAME": self.dag_name, - "DOCKER_IMAGE": self.docker_image, + "DOCKER_IMAGE": "{{ var.value.azure_container_registry }}/csv-parser:" + self.build_version, "DNS_HOST": self.dns_host_name, "NAMESPACE": self.namespace, "ENV_VARS": json.dumps(self.env_vars, indent=4), -- GitLab From 20eb2545fd76f1a7e2448fbd5811a755226a865d Mon Sep 17 00:00:00 2001 From: Harshit Aggarwal Date: Fri, 24 Sep 2021 14:34:32 +0530 Subject: [PATCH 3/7] minor fix --- airflowdags/csv_ingestion_all_steps.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/airflowdags/csv_ingestion_all_steps.py b/airflowdags/csv_ingestion_all_steps.py index c81c3b2a..6c23c388 100644 --- a/airflowdags/csv_ingestion_all_steps.py +++ b/airflowdags/csv_ingestion_all_steps.py @@ -29,10 +29,6 @@ run_id = "{{ dag_run.conf['run_id'] }}" data_service_to_use = "{{ dag_run.conf['execution_context'].get('data_service_to_use', 'file') }}" steps = ["LOAD_FROM_CSV", "TYPE_COERCION", "ID", "ACL", "LEGAL", "KIND", "META", "TAGS", "UNIT", "CRS", "RELATIONSHIP", "STORE_TO_OSDU"] -# Get environment variables -# TODO: put env vars here from application.properties -env_vars = {| ENV_VARS or {} |} - # Constants DAG_NAME = "{| DAG_NAME |}" DOCKER_IMAGE = "{| DOCKER_IMAGE |}" @@ -48,6 +44,9 @@ params = { "dataServiceName": "{{ dag_run.conf['execution_context'].get('data_service_to_use', 'file') }}" } +# Get environment variables +# TODO: put env vars here from application.properties +env_vars = {| ENV_VARS or {} |} if data_service_to_use: env_vars["data_service_to_use"] = data_service_to_use -- GitLab From e2f0aba62129e14464a7eb12fface3523876be46 Mon Sep 17 00:00:00 2001 From: Harshit Aggarwal Date: Mon, 27 Sep 2021 15:29:39 +0530 Subject: [PATCH 4/7] PR comments --- deployments/scripts/azure/prepare_dags.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/deployments/scripts/azure/prepare_dags.sh b/deployments/scripts/azure/prepare_dags.sh index f1c522d8..48e713ee 100644 --- a/deployments/scripts/azure/prepare_dags.sh +++ b/deployments/scripts/azure/prepare_dags.sh @@ -29,9 +29,9 @@ if [ "$AZURE_GITLAB_PIPELINE_RUN" == "true" ]; then docker push $CI_REGISTRY_IMAGE/$DAG_IMAGE echo "Pushing to Azure Container Registry" - docker tag $CI_REGISTRY_IMAGE/$PARSER_IMAGE ${AZURE_REGISTRY}.azurecr.io/osdu-csv-parser:$CI_COMMIT_SHA + docker tag $CI_REGISTRY_IMAGE/$PARSER_IMAGE ${AZURE_REGISTRY}.azurecr.io/csv-parser:$CI_COMMIT_SHA docker tag $CI_REGISTRY_IMAGE/$DAG_IMAGE ${AZURE_REGISTRY}.azurecr.io/$DAG_IMAGE - docker push ${AZURE_REGISTRY}.azurecr.io/osdu-csv-parser:$BUILD_VERSION + docker push ${AZURE_REGISTRY}.azurecr.io/csv-parser:$BUILD_VERSION docker push ${AZURE_REGISTRY}.azurecr.io/$DAG_IMAGE export BUILD_VERSION=$CI_COMMIT_SHA @@ -39,11 +39,11 @@ if [ "$AZURE_GITLAB_PIPELINE_RUN" == "true" ]; then else echo "Build docker images" - docker build --file ./$AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR/dockerFolder/csv_parser_dockerfile -t ${AZURE_REGISTRY}.azurecr.io/osdu-csv-parser:$BUILD_VERSION . + docker build --file ./$AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR/dockerFolder/csv_parser_dockerfile -t ${AZURE_REGISTRY}.azurecr.io/csv-parser:$BUILD_VERSION . docker build --build-arg AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR=$AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR --file ./$AZURE_DEPLOYMENTS_SCRIPTS_SUBDIR/dockerFolder/build_dag_dockerfile -t $DAG_IMAGE . echo "Pushing to Azure Container Registry" - docker push ${AZURE_REGISTRY}.azurecr.io/osdu-csv-parser:$BUILD_VERSION + docker push ${AZURE_REGISTRY}.azurecr.io/csv-parser:$BUILD_VERSION docker push $DAG_IMAGE fi -- GitLab From 3801b3d148c7acc01434f9264b7e32afb342aa3b Mon Sep 17 00:00:00 2001 From: Harshit Aggarwal Date: Mon, 27 Sep 2021 16:07:52 +0530 Subject: [PATCH 5/7] PR comments --- deployments/scripts/azure/create_dag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployments/scripts/azure/create_dag.py b/deployments/scripts/azure/create_dag.py index cb24b85a..61ca18a9 100644 --- a/deployments/scripts/azure/create_dag.py +++ b/deployments/scripts/azure/create_dag.py @@ -50,7 +50,7 @@ class CreateDag: params = { "DAG_NAME": self.dag_name, - "DOCKER_IMAGE": "{{ var.value.azure_container_registry }}/csv-parser:" + self.build_version, + "DOCKER_IMAGE": "{{ var.value.dag_image_acr }}/csv-parser:" + self.build_version, "DNS_HOST": self.dns_host_name, "NAMESPACE": self.namespace, "ENV_VARS": json.dumps(self.env_vars, indent=4), -- GitLab From 233b29e9797cb7743df8ead6dbf2edcceb1cc65b Mon Sep 17 00:00:00 2001 From: Harshit Aggarwal Date: Tue, 28 Sep 2021 07:05:14 +0530 Subject: [PATCH 6/7] minor fix --- airflowdags/csv_ingestion_all_steps.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/airflowdags/csv_ingestion_all_steps.py b/airflowdags/csv_ingestion_all_steps.py index 6c23c388..508fbf80 100644 --- a/airflowdags/csv_ingestion_all_steps.py +++ b/airflowdags/csv_ingestion_all_steps.py @@ -50,8 +50,6 @@ env_vars = {| ENV_VARS or {} |} if data_service_to_use: env_vars["data_service_to_use"] = data_service_to_use -env_vars["docker_image"] = DOCKER_IMAGE - operator_kwargs = {| K8S_POD_OPERATOR_KWARGS or {} |} with DAG(DAG_NAME, default_args=default_args) as dag: @@ -66,7 +64,7 @@ with DAG(DAG_NAME, default_args=default_args) as dag: env_vars=env_vars, arguments=[dumps(params)], is_delete_operator_pod=True, - image=env_vars["docker_image"], + image=DOCKER_IMAGE, **operator_kwargs) update_status_finished = UpdateStatusOperator( -- GitLab From 6e1651d0516929603fa213d36b09ed1955b013d1 Mon Sep 17 00:00:00 2001 From: Harshit Aggarwal Date: Tue, 28 Sep 2021 09:32:55 +0530 Subject: [PATCH 7/7] docker image name fix --- deployments/scripts/azure/prepare_dags.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployments/scripts/azure/prepare_dags.sh b/deployments/scripts/azure/prepare_dags.sh index 48e713ee..fb367c0a 100644 --- a/deployments/scripts/azure/prepare_dags.sh +++ b/deployments/scripts/azure/prepare_dags.sh @@ -31,7 +31,7 @@ if [ "$AZURE_GITLAB_PIPELINE_RUN" == "true" ]; then echo "Pushing to Azure Container Registry" docker tag $CI_REGISTRY_IMAGE/$PARSER_IMAGE ${AZURE_REGISTRY}.azurecr.io/csv-parser:$CI_COMMIT_SHA docker tag $CI_REGISTRY_IMAGE/$DAG_IMAGE ${AZURE_REGISTRY}.azurecr.io/$DAG_IMAGE - docker push ${AZURE_REGISTRY}.azurecr.io/csv-parser:$BUILD_VERSION + docker push ${AZURE_REGISTRY}.azurecr.io/csv-parser:$CI_COMMIT_SHA docker push ${AZURE_REGISTRY}.azurecr.io/$DAG_IMAGE export BUILD_VERSION=$CI_COMMIT_SHA -- GitLab