Commit 89c7362e authored by harshit aggarwal's avatar harshit aggarwal
Browse files

Merge branch 'airflow-worker-autoscaling' into 'master'

Added autoscaling configuration for airflow workers

See merge request !419
parents 2c3159a3 4b1061fb
Pipeline #54993 passed with stages
in 1 minute and 13 seconds
CHANGELOG.md
(.*/)?\.envoy-airflow-authentication$
.*\.http$
charts/airflow/charts/CHANGELOG.md
\ No newline at end of file
# CHANGELOG
## [90cdb1bc1597f74b34ec2a2fd369b460d9fc3251](https://community.opengroup.org/osdu/platform/deployment-and-operations/infra-azure-provisioning/-/commit/90cdb1bc1597f74b34ec2a2fd369b460d9fc3251)
- Fixed graceful termination script in `templates/configmap-scripts.yaml` under `graceful-stop-celery-worker.sh` where the `celery inspect` command is wrapped with `$()` to avoid syntax error if redis password contains special characters
Below is the change in `templates/configmap-scripts.yaml` under `graceful-stop-celery-worker.sh`
**Actual code**
```bash
while (( celery inspect --broker $AIRFLOW__CELERY__BROKER_URL --destination celery@$HOSTNAME --json active | python3 -c "import json; active_tasks = json.loads(input())['celery@$HOSTNAME']; print(len(active_tasks))" > 0 )); do
sleep 10
done
```
**Changed code**
```bash
while (( $(celery inspect --broker $AIRFLOW__CELERY__BROKER_URL --destination celery@$HOSTNAME --json active | python3 -c "import json; active_tasks = json.loads(input())['celery@$HOSTNAME']; print(len(active_tasks))") > 0 )); do
sleep 10
done
```
\ No newline at end of file
## Airflow Helm Chart
This is the base airflow helm chart provided by apache community to install airflow components in kubernetes.
### How to make changes to airflow helm chart?
- Un tar the file by running this command - `tar -xvf airflow-7.5.0.tgz`
- The above step will extract folder `airflow`
- Make the required changes to the files in this folder
- Delete the existing tar file by running this command - `rm -rf airflow-7.5.0.tgz`
- Create a tar file out of above folder by running this command - `tar -cvzf airflow-7.5.0.tgz airflow/`
- Add the change description to the CHANGELOG [here](./CHANGELOG.md)
......@@ -67,6 +67,11 @@ pgbouncer:
passwordSecret: "postgres"
passwordSecretKey: "postgres-password"
################################################################################
# Specify KEDA configuration
#
keda:
version_2_enabled: false
################################################################################
# Specify the airflow configuration
......@@ -179,17 +184,15 @@ airflow:
aadpodidbinding: "osdu-identity"
podAnnotations:
sidecar.istio.io/inject: "false"
autoscaling:
autoscale:
enabled: false
## minReplicas is picked from Values.workers.replicas and default value is 1
maxReplicas: 3
metrics:
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 60
minReplicas: 2
maxReplicas: 20
scaleDown:
coolDownPeriod: 300
celery:
gracefullTermination: true
gracefullTerminationPeriod: 600
labels:
# DO NOT DELETE THIS LABEL. SET IT TO "false" WHEN AUTOSCALING IS DISABLED, SET IT TO "true" WHEN AUTOSCALING IS ENABLED
autoscalingEnabled: "false"
......@@ -261,6 +264,7 @@ airflow:
AIRFLOW__CORE__STORE_SERIALIZED_DAGS: True #This flag decides whether to serialise DAGs and persist them in DB
AIRFLOW__CORE__STORE_DAG_CODE: True #This flag decides whether to persist DAG files code in DB
AIRFLOW__WEBSERVER__WORKER_CLASS: gevent
AIRFLOW__CELERY__WORKER_CONCURRENCY: 16
extraEnv:
- name: CLOUD_PROVIDER
value: "azure"
......@@ -286,11 +290,17 @@ airflow:
value: "#{AAD_CLIENT_ID}#"
- name: AIRFLOW_VAR_APPINSIGHTS_KEY
value: "#{appinsights-key}#"
- name: PYTHONPATH
value: "/opt/celery"
extraConfigmapMounts:
- name: remote-log-config
mountPath: /opt/airflow/config
configMap: airflow-remote-log-config
readOnly: true
- name: celery-config
mountPath: /opt/celery
configMap: celery-config
readOnly: true
extraPipPackages: [
"flask-bcrypt==0.7.1",
"apache-airflow[statsd]",
......
......@@ -55,6 +55,12 @@ pgbouncer:
passwordSecret: "postgres"
passwordSecretKey: "postgres-password"
################################################################################
# Specify KEDA configuration
#
keda:
version_2_enabled: false
################################################################################
# Specify the airflow configuration
......@@ -172,17 +178,15 @@ airflow:
memory: "5Gi"
podLabels:
aadpodidbinding: "osdu-identity"
autoscaling:
autoscale:
enabled: false
## minReplicas is picked from Values.workers.replicas and default value is 1
maxReplicas: 3
metrics:
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 60
minReplicas: 2
maxReplicas: 20
scaleDown:
coolDownPeriod: 300
celery:
gracefullTermination: true
gracefullTerminationPeriod: 600
labels:
# DO NOT DELETE THIS LABEL. SET IT TO "false" WHEN AUTOSCALING IS DISABLED, SET IT TO "true" WHEN AUTOSCALING IS ENABLED
autoscalingEnabled: "false"
......@@ -252,6 +256,7 @@ airflow:
AIRFLOW__CORE__STORE_SERIALIZED_DAGS: True #This flag decides whether to serialise DAGs and persist them in DB
AIRFLOW__CORE__STORE_DAG_CODE: True #This flag decides whether to persist DAG files code in DB
AIRFLOW__WEBSERVER__WORKER_CLASS: gevent
AIRFLOW__CELERY__WORKER_CONCURRENCY: 16 # Do not remove this config as it is used for autoscaling as well
extraEnv:
- name: CLOUD_PROVIDER
value: "azure"
......@@ -295,11 +300,17 @@ airflow:
secretKeyRef:
name: central-logging
key: appinsights
- name: PYTHONPATH
value: "/opt/celery"
extraConfigmapMounts:
- name: remote-log-config
mountPath: /opt/airflow/config
configMap: airflow-remote-log-config
readOnly: true
- name: celery-config
mountPath: /opt/celery
configMap: celery-config
readOnly: true
extraPipPackages: [
"flask-bcrypt==0.7.1",
"apache-airflow[statsd]",
......
{{- if and .Values.airflow.workers.autoscale.enabled .Values.keda.version_2_enabled -}}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
# We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
name: {{ printf "%s" .Release.Name | trunc 63 | trimSuffix "-" }}-worker
spec:
scaleTargetRef:
kind: StatefulSet
name: {{ printf "%s" .Release.Name | trunc 63 | trimSuffix "-" }}-worker
pollingInterval: 30
minReplicaCount: {{ .Values.airflow.workers.autoscale.minReplicas }}
maxReplicaCount: {{ .Values.airflow.workers.autoscale.maxReplicas }}
advanced:
horizontalPodAutoscalerConfig:
behavior:
scaleUp:
stabilizationWindowSeconds: 120
policies:
- type: Percent
value: 900
periodSeconds: 30
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 30
periodSeconds: {{ .Values.airflow.workers.autoscale.scaleDown.coolDownPeriod }}
triggers:
- type: postgresql
metadata:
{{- if eq (.Values.pgbouncer.enabled | default false) true }}
host: {{ .Values.pgbouncer.airflowdb.host | quote }}
userName: {{ .Values.pgbouncer.airflowdb.user | quote }}
port: {{ .Values.pgbouncer.airflowdb.port | quote }}
dbName: {{ .Values.pgbouncer.airflowdb.name | quote }}
{{- else }}
host: {{ .Values.airflow.externalDatabase.host | quote }}
userName: {{ .Values.airflow.externalDatabase.user | quote }}
port: {{ .Values.airflow.externalDatabase.port | quote }}
dbName: {{ .Values.airflow.externalDatabase.database | quote }}
{{- end}}
passwordFromEnv: DATABASE_PASSWORD
sslmode: "require"
targetQueryValue: "1"
query: "SELECT ceil(COUNT(*)::decimal / {{ .Values.airflow.airflow.config.AIRFLOW__CELERY__WORKER_CONCURRENCY }}) FROM task_instance WHERE state='running' OR state='queued'"
metricName: "active_tasks_count"
{{- end }}
\ No newline at end of file
apiVersion: v1
kind: ConfigMap
metadata:
name: celery-config
data:
__init__.py: ""
celeryconfig.py: |
import ssl
broker_use_ssl = {'ssl_keyfile': '', 'ssl_certfile': '', 'ssl_ca_certs': '', 'ssl_cert_reqs': ssl.CERT_REQUIRED}
......@@ -20,7 +20,7 @@ locals {
helm_keda_name = "keda"
helm_keda_ns = "keda"
helm_keda_repo = "https://kedacore.github.io/charts"
helm_keda_version = "2.1.0"
helm_keda_version = "2.2.0"
}
resource "kubernetes_namespace" "keda" {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment