Commit 405ecfed authored by Vineeth Guna [Microsoft]'s avatar Vineeth Guna [Microsoft] Committed by Kishore Battula
Browse files

Airflow chart changes for multi partition support

parent 2ac6026e
################################################################################
# Specify the azure environment specific values
#
azure:
dp:
tenant: #{data-partition-tenant-id}#
subscription: #{data-partition-subscription-id}#
resourcegroup: #{base-name-dp}#-rg
identity: #{base-name-dp}#-osdu-identity
identity_id: #{management-identity-id}#
keyvault: #{base-name-dp}#-kv
cr:
tenant: #{tenant-id}#
subscription: #{subscription-id}#
resourcegroup: #{base-name-cr}#-rg
keyvault: #{base-name-cr}#-dpkv
################################################################################
# App insights configuration
#
appinsightstatsd:
aadpodidbinding: "osdu-identity"
#################################################################################
# Specify log analytics configuration
#
logAnalytics:
workspaceId:
secretName: "dp-logging"
secretKey: "workspace-id"
workspaceKey:
secretName: "dp-logging"
secretKey: "workspace-key"
################################################################################
# Specify any optional override values
#
image:
repository: #{container-registry}#.azurecr.io
branch: #{ENVIRONMENT_NAME}#
tag: #{Build.SourceVersion}#
airflowLogin:
name: admin
airflowAuthentication:
username: admin
keyvaultMountPath: /mnt/azure-keyvault/
passwordKey: airflow-admin-password
################################################################################
# Specify any custom configs/environment values
#
customConfig:
rbac:
createUser: "True"
################################################################################
# Specify pgbouncer configuration
#
pgbouncer:
enabled: true
port: 6543
max_client_connections: 3000
airflowdb:
name: airflow
host: #{base-name-dp}#-pg.postgres.database.azure.com
port: 5432
pool_size: 100
user: osdu_admin@#{base-name-dp}#-pg
passwordSecret: "postgres"
passwordSecretKey: "postgres-password"
################################################################################
# Specify the airflow configuration
#
airflow:
isDataPartitionDeployment: true
##################################
# Kubernetes Pod Operator config
##################################
kubernetesPodOperator:
namespace: airflow
serviceAccount:
name: airflow
###################################
# Kubernetes - Ingress Configs
###################################
ingress:
enabled: true
web:
annotations:
kubernetes.io/ingress.class: istio
path: "/airflow"
host: #{DNS_HOST}#
livenessPath: "/airflow/health"
tls:
enabled: true
secretName: osdu-certificate
precedingPaths:
- path: "/airflow/*"
serviceName: airflow-web
servicePort: 8080
###################################
# Database - External Database
###################################
postgresql:
enabled: false
externalDatabase:
type: postgres
host: airflow-pgbouncer #<-- Azure PostgreSQL Database host or pgbouncer host (if pgbouncer is enabled)
user: osdu_admin@#{base-name-dp}#-pg #<-- Azure PostgreSQL Database username, formatted as {username}@{hostname}
passwordSecret: "postgres"
passwordSecretKey: "postgres-password"
port: 6543
database: airflow
###################################
# Database - External Redis
###################################
redis:
enabled: false
externalRedis:
host: #{base-name-dp}#-cache.redis.cache.windows.net #<-- Azure Redis Cache host
port: 6380
passwordSecret: "redis"
passwordSecretKey: "redis-password"
databaseNumber: 1 #<-- Adding redis database number according to the Redis config map https://community.opengroup.org/osdu/platform/deployment-and-operations/infra-azure-provisioning/-/blob/master/charts/osdu-common/templates/redis-map.yaml#L7
###################################
# Airflow - DAGs Configs
###################################
dags:
installRequirements: true
persistence:
enabled: true
existingClaim: airflowdagpvc
subPath: "dags"
###################################
# Airflow - WebUI Configs
###################################
web:
replicas: 1
livenessProbe:
timeoutSeconds: 60
resources:
requests:
cpu: "2000m"
memory: "2Gi"
limits:
cpu: "3000m"
memory: "2Gi"
podLabels:
aadpodidbinding: "osdu-identity"
podAnnotations:
sidecar.istio.io/userVolumeMount: '[{"name": "azure-keyvault", "mountPath": "/mnt/azure-keyvault", "readonly": true}]'
baseUrl: "http://localhost/airflow"
###################################
# Airflow - Worker Configs
###################################
workers:
resources:
requests:
cpu: "1200m"
memory: "5Gi"
limits:
cpu: "1200m"
memory: "5Gi"
podLabels:
aadpodidbinding: "osdu-identity"
podAnnotations:
sidecar.istio.io/inject: "false"
autoscaling:
enabled: false
## minReplicas is picked from Values.workers.replicas and default value is 1
maxReplicas: 3
metrics:
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 60
labels:
# DO NOT DELETE THIS LABEL. SET IT TO "false" WHEN AUTOSCALING IS DISABLED, SET IT TO "true" WHEN AUTOSCALING IS ENABLED
autoscalingEnabled: "false"
###################################
# Airflow - Flower Configs
###################################
flower:
enabled: false
###################################
# Airflow - Scheduler Configs
###################################
scheduler:
resources:
requests:
cpu: "3000m"
memory: "1Gi"
limits:
cpu: "3000m"
memory: "1Gi"
podLabels:
aadpodidbinding: "osdu-identity"
podAnnotations:
sidecar.istio.io/inject: "false"
variables: |
{}
###################################
# Airflow - Common Configs
###################################
airflow:
image:
repository: community.opengroup.org:5555/osdu/platform/deployment-and-operations/base-containers-azure/airflow-docker-image/initial-checkin
tag: latest
pullPolicy: IfNotPresent
pullSecret: ""
config:
AIRFLOW__SCHEDULER__STATSD_ON: "True"
AIRFLOW__SCHEDULER__STATSD_HOST: "appinsights-statsd"
AIRFLOW__SCHEDULER__STATSD_PORT: 8125
AIRFLOW__SCHEDULER__STATSD_PREFIX: "osdu_airflow.#{base-name-dp}#"
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: "False"
## Enable for Debug purpose
AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False"
AIRFLOW__WEBSERVER__AUTHENTICATE: "True"
AIRFLOW__WEBSERVER__AUTH_BACKEND: "airflow.contrib.auth.backends.password_auth"
AIRFLOW__WEBSERVER__RBAC: "True"
AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.default"
AIRFLOW__CORE__REMOTE_LOGGING: "True"
AIRFLOW__CORE__REMOTE_LOG_CONN_ID: "az_log"
AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER: "wasb-airflowlog"
AIRFLOW__CORE__LOGGING_CONFIG_CLASS: "log_config.DEFAULT_LOGGING_CONFIG"
AIRFLOW__CORE__LOG_FILENAME_TEMPLATE: "{{ run_id }}/{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{% if dag_run.conf is not none and 'correlation_id' in dag_run.conf %}{{ dag_run.conf['correlation_id'] }}{% else %}None{% endif %}/{{ try_number }}.log"
AIRFLOW__CELERY__SSL_ACTIVE: "True"
AIRFLOW__WEBSERVER__ENABLE_PROXY_FIX: "True"
AIRFLOW__CORE__PLUGINS_FOLDER: "/opt/airflow/plugins"
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: 60
AIRFLOW__CORE__LOGGING_LEVEL: DEBUG
AIRFLOW_VAR_CORE__CONFIG__DATALOAD_CONFIG_PATH: "/opt/airflow/dags/configs/dataload.ini"
AIRFLOW_VAR_CORE__SERVICE__SCHEMA__URL: "https://#{OSDU_SVC_ENDPOINT}#/api/schema-service/v1/schema"
AIRFLOW_VAR_CORE__SERVICE__SEARCH__URL: "https://#{OSDU_SVC_ENDPOINT}#/api/search/v2/query"
AIRFLOW_VAR_CORE__SERVICE__STORAGE__URL: "https://#{OSDU_SVC_ENDPOINT}#/api/storage/v2/records"
AIRFLOW_VAR_CORE__SERVICE__FILE__HOST: "https://#{OSDU_SVC_ENDPOINT}#/api/file/v2"
AIRFLOW_VAR_CORE__SERVICE__WORKFLOW__HOST: "https://#{OSDU_SVC_ENDPOINT}#/api/workflow"
AIRFLOW_VAR_CORE__SERVICE__SEARCH_WITH_CURSOR__URL: "https://#{OSDU_SVC_ENDPOINT}#/api/search/v2/query_with_cursor"
AIRFLOW__WEBSERVER__WORKERS: 15
AIRFLOW__WEBSERVER__WORKER_REFRESH_BATCH_SIZE: 0
AIRFLOW__CORE__STORE_SERIALIZED_DAGS: True #This flag decides whether to serialise DAGs and persist them in DB
AIRFLOW__CORE__STORE_DAG_CODE: True #This flag decides whether to persist DAG files code in DB
AIRFLOW__WEBSERVER__WORKER_CLASS: gevent
extraEnv:
- name: CLOUD_PROVIDER
value: "azure"
- name: AIRFLOW_VAR_KEYVAULT_URI
value: "https://#{base-name-dp}#-kv.vault.azure.net/"
- name: AIRFLOW__CORE__FERNET_KEY
valueFrom:
secretKeyRef:
name: airflow
key: fernet-key
- name: AIRFLOW_CONN_AZ_LOG
valueFrom:
secretKeyRef:
name: airflow
key: remote-log-connection
- name: AIRFLOW_VAR_AZURE_TENANT_ID
value: "#{AZURE_TENANT_ID}#"
- name: AIRFLOW_VAR_AZURE_CLIENT_ID
value: "#{AZURE_CLIENT_ID}#"
- name: AIRFLOW_VAR_AZURE_CLIENT_SECRET
value: "#{AZURE_CLIENT_SECRET}#"
- name: AIRFLOW_VAR_AAD_CLIENT_ID
value: "#{AAD_CLIENT_ID}#"
- name: AIRFLOW_VAR_APPINSIGHTS_KEY
valueFrom:
secretKeyRef:
name: central-logging
key: appinsights
extraConfigmapMounts:
- name: remote-log-config
mountPath: /opt/airflow/config
configMap: airflow-remote-log-config
readOnly: true
extraPipPackages: [
"flask-bcrypt==0.7.1",
"apache-airflow[statsd]",
"apache-airflow[kubernetes]",
"apache-airflow-backport-providers-microsoft-azure==2021.2.5",
"dataclasses==0.8",
"google-cloud-storage",
"python-keycloak==0.24.0",
"msal==1.9.0",
"azure-identity==1.5.0",
"azure-keyvault-secrets==4.2.0",
"azure-storage-blob",
"azure-servicebus==7.0.1",
"toposort==1.6",
"strict-rfc3339==0.7",
"https://azglobalosdutestlake.blob.core.windows.net/pythonsdk/osdu_api-0.0.4.tar.gz"
]
extraVolumeMounts:
- name: azure-keyvault
mountPath: "/mnt/azure-keyvault"
readOnly: true
- name: dags-data
mountPath: /opt/airflow/plugins
subPath: plugins
extraVolumes:
- name: azure-keyvault
csi:
driver: secrets-store.csi.k8s.io
readOnly: true
volumeAttributes:
secretProviderClass: azure-keyvault
......@@ -4,6 +4,17 @@
appinsightstatsd:
aadpodidbinding: "osdu-identity"
#################################################################################
# Specify log analytics configuration
#
logAnalytics:
workspaceId:
secretName: "central-logging"
secretKey: "workspace-id"
workspaceKey:
secretName: "central-logging"
secretKey: "workspace-key"
################################################################################
# Specify any optional override values
#
......@@ -50,6 +61,15 @@ pgbouncer:
#
airflow:
##################################
# Kubernetes Pod Operator config
##################################
kubernetesPodOperator:
namespace: airflow
serviceAccount:
name: airflow
###################################
# Kubernetes - Ingress Configs
###################################
......
{{- $isEnabled := .Values.airflow.isDataPartitionDeployment | default false -}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: airflow-log-processor
namespace: osdu
labels:
app: airflow-log-processor
spec:
......@@ -29,13 +29,13 @@ spec:
- name: AzureLogWorkspaceCustomerId
valueFrom:
secretKeyRef:
name: central-logging
key: workspace-id
name: {{ .Values.logAnalytics.workspaceId.secretName }}
key: {{ .Values.logAnalytics.workspaceId.secretKey }}
- name: AzureLogWorkspaceSharedKey
valueFrom:
secretKeyRef:
name: central-logging
key: workspace-key
name: {{ .Values.logAnalytics.workspaceKey.secretName }}
key: {{ .Values.logAnalytics.workspaceKey.secretKey }}
- name: AzureWebJobsStorage
valueFrom:
secretKeyRef:
......@@ -43,24 +43,33 @@ spec:
key: storage-connection
---
{{- if $isEnabled }}
apiVersion: keda.sh/v1alpha1
{{- else }}
apiVersion: keda.k8s.io/v1alpha1
{{- end }}
kind: TriggerAuthentication
metadata:
name: azure-queue-auth
namespace: osdu
spec:
podIdentity:
provider: azure
---
apiVersion: keda.k8s.io/v1alpha1
kind: ScaledObject
metadata:
name: azure-queue-scaledobject
namespace: osdu
{{- if $isEnabled }}
apiVersion: keda.sh/v1alpha1
spec:
scaleTargetRef:
name: airflow-log-processor
{{- else }}
apiVersion: keda.k8s.io/v1alpha1
spec:
scaleTargetRef:
deploymentName: azurequeue-function
deploymentName: airflow-log-processor
{{- end }}
triggers:
- type: azure-queue
metadata:
......
......@@ -7,7 +7,7 @@ data:
{
backends: ["appinsights-statsd"],
aiInstrumentationKey: process.env.APPLICATION_INSIGHTS_INSTRUMENTATION_KEY,
aiPrefix: "osdu_airflow",
aiPrefix: "{{ .Values.airflow.airflow.config.AIRFLOW__SCHEDULER__STATSD_PREFIX }}",
aiRoleName: "airflow",
aiRoleInstance: process.env.HOSTNAME,
aiTrackStatsDMetrics: true,
......
{{- $isEnabled := .Values.airflow.isDataPartitionDeployment | default false -}}
apiVersion: apps/v1
kind: Deployment
metadata:
......@@ -24,6 +25,11 @@ spec:
volumeMounts:
- name: config-volume
mountPath: /usr/src/app/statsd/backends/config
{{- if $isEnabled }}
- name: azure-cr-keyvault
mountPath: "/mnt/azure-cr-keyvault"
readOnly: true
{{- end }}
env:
- name: APPLICATION_INSIGHTS_INSTRUMENTATION_KEY
valueFrom:
......@@ -43,4 +49,13 @@ spec:
volumes:
- name: config-volume
configMap:
name: airflow-appinsight-statsd-config
\ No newline at end of file
name: airflow-appinsight-statsd-config
{{- if $isEnabled }}
- name: azure-cr-keyvault
csi:
driver: secrets-store.csi.k8s.io
readOnly: true
volumeAttributes:
secretProviderClass: azure-cr-keyvault
{{- end }}
\ No newline at end of file
{{- $isEnabled := .Values.airflow.isDataPartitionDeployment | default false -}}
{{- $prefixNamespaceString := printf "%s-" .Release.Namespace }}
apiVersion: v1
kind: PersistentVolume
metadata:
name: airflowdagpv
name: {{ $isEnabled | ternary $prefixNamespaceString "" }}airflowdagpv
labels:
usage: airflow-dag
usage: {{ $isEnabled | ternary $prefixNamespaceString "" }}airflow-dag
spec:
capacity:
storage: 5Gi
......@@ -38,4 +40,4 @@ spec:
storage: 5Gi
selector:
matchLabels:
usage: airflow-dag
\ No newline at end of file
usage: {{ $isEnabled | ternary $prefixNamespaceString "" }}airflow-dag
\ No newline at end of file
{{- $isEnabled := .Values.airflow.isDataPartitionDeployment | default false -}}
{{ if $isEnabled }}
apiVersion: aadpodidentity.k8s.io/v1
kind: AzureIdentity
metadata:
name: osdu-identity
spec:
type: 0
resourceID: "/subscriptions/{{ .Values.azure.dp.subscription }}/resourcegroups/{{ .Values.azure.dp.resourcegroup }}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{{ .Values.azure.dp.identity }}"
clientID: "{{ .Values.azure.dp.identity_id }}"
---
apiVersion: aadpodidentity.k8s.io/v1
kind: AzureIdentityBinding
metadata:
name: osdu-identity-binding
spec:
azureIdentity: osdu-identity
selector: osdu-identity
{{ end }}
{{- $isEnabled := .Values.airflow.isDataPartitionDeployment | default false -}}
{{ if $isEnabled }}
apiVersion: secrets-store.csi.x-k8s.io/v1alpha1
kind: SecretProviderClass
metadata:
name: azure-keyvault
spec:
provider: azure
secretObjects:
- secretName: active-directory
type: Opaque
data:
- objectName: "app-dev-sp-tenant-id"
key: tenantid
- secretName: airflow
type: Opaque
data:
- objectName: airflow-storage
key: storage-account
- objectName: airflow-storage-key
key: storage-key
- objectName: airflow-storage-connection
key: storage-connection
- objectName: airflow-remote-log-connection
key: remote-log-connection
- objectName: airflow-admin-password
key: admin-password
- objectName: airflow-fernet-key
key: fernet-key
- objectName: airflow-storage
key: azurestorageaccountname
- objectName: airflow-storage-key
key: azurestorageaccountkey
- secretName: postgres
type: Opaque
data:
- objectName: "postgres-password"
key: postgres-password
- secretName: redis
type: Opaque
data:
- objectName: "redis-password"
key: redis-password
- secretName: dp-logging
type: Opaque
data:
- objectName: "log-workspace-id"
key: workspace-id
- objectName: "log-workspace-key"
key: workspace-key
parameters:
usePodIdentity: "true"
useVMManagedIdentity: "false"
userAssignedIdentityID: ""
resourceGroup: "{{ .Values.azure.dp.resourcegroup }}"
keyvaultName: "{{ .Values.azure.dp.keyvault }}"
subscriptionId: "{{ .Values.azure.dp.subscription }}"
tenantId: "{{ .Values.azure.dp.tenant }}"
objects: |
array:
- |
objectName: airflow-storage
objectType: secret
- |
objectName: airflow-storage-connection
objectType: secret
- |
objectName: airflow-remote-log-connection
objectType: secret
- |
objectName: airflow-storage-key
objectType: secret
- |
objectName: airflow-admin-password
objectType: secret
- |
objectName: airflow-fernet-key
objectType: secret
- |
objectName: postgres-password
objectType: secret
- |
objectName: redis-password
objectType: secret
- |
objectName: log-workspace-id
objectType: secret
- |
objectName: log-workspace-key
objectType: secret
- |
objectName: app-dev-sp-tenant-id
objectType: secret