From 4424bdc62905ab8a29be274c62edd4c44ae5e534 Mon Sep 17 00:00:00 2001 From: "Vadzim Beuzo [EPAM / GCP]" <vadzim_beuzo@epam.com> Date: Tue, 15 Oct 2024 11:31:28 +0000 Subject: [PATCH] GONRG-10266-Bring-Istio-Autoscaling-to-core-services-Part3 --- devops/gc/deploy/README.md | 25 +++++++++++++ devops/gc/deploy/templates/hpa.yaml | 43 ++++++++++++++++++++++ devops/gc/deploy/templates/rate-limit.yaml | 43 ++++++++++++++++++++++ devops/gc/deploy/values.yaml | 19 ++++++++++ 4 files changed, 130 insertions(+) create mode 100644 devops/gc/deploy/templates/hpa.yaml create mode 100644 devops/gc/deploy/templates/rate-limit.yaml diff --git a/devops/gc/deploy/README.md b/devops/gc/deploy/README.md index c550a55df..f1fc3c4b3 100644 --- a/devops/gc/deploy/README.md +++ b/devops/gc/deploy/README.md @@ -34,6 +34,8 @@ First you need to set variables in **values.yaml** file using any code editor. S **global.onPremEnabled** | whether on-prem is enabled | boolean | `false` | yes **global.limitsEnabled** | whether CPU and memory limits are enabled | boolean | `true` | yes **global.logLevel** | severity of logging level | string | `ERROR` | yes +**global.tier** | Only PROD must be used to enable autoscaling | string | - | no +**global.autoscaling** | enables horizontal pod autoscaling, when tier=PROD | boolean | `true` | yes ### Configmap variables @@ -94,6 +96,29 @@ First you need to set variables in **values.yaml** file using any code editor. S **istio.bootstrapProxyCPU** | CPU request for Envoy sidecars | string | `10m` | yes **istio.bootstrapProxyCPULimit** | CPU limit for Envoy sidecars | string | `100m` | yes +### Horizontal Pod Autoscaling (HPA) variables (works only if tier=PROD and autoscaling=true) + +| Name | Description | Type | Default |Required | +|------|-------------|------|---------|---------| +**hpa.minReplicas** | minimum number of replicas | integer | `6` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.maxReplicas** | maximum number of replicas | integer | `15` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.targetType** | type of measurements: AverageValue or Value | string | `AverageValue` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.targetValue** | threshold value to trigger the scaling up | integer | `120` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.behaviorScaleUpStabilizationWindowSeconds** | time to start implementing the scale up when it is triggered | integer | `10` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.behaviorScaleUpPoliciesValue** | the maximum number of new replicas to create (in percents from current state)| integer | `50` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.behaviorScaleUpPoliciesPeriodSeconds** | pause for every new scale up decision | integer | `15` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.behaviorScaleDownStabilizationWindowSeconds** | time to start implementing the scale down when it is triggered | integer | `60` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.behaviorScaleDownPoliciesValue** | the maximum number of replicas to destroy (in percents from current state) | integer | `25` | only if `global.autoscaling` is true and `global.tier` is PROD +**hpa.behaviorScaleDownPoliciesPeriodSeconds** | pause for every new scale down decision | integer | `60` | only if `global.autoscaling` is true and `global.tier` is PROD + +### Limits variables + +| Name | Description | Type | Default |Required | +|------|-------------|------|---------|---------| +**limits.maxTokens** | maximum number of requests per fillInterval | integer | `70` | only if `global.autoscaling` is true and `global.tier` is PROD +**limits.tokensPerFill** | number of new tokens allowed every fillInterval | integer | `70` | only if `global.autoscaling` is true and `global.tier` is PROD +**limits.fillInterval** | time interval | string | `1s` | only if `global.autoscaling` is true and `global.tier` is PROD + ### Install the helm chart Run this command from within this directory: diff --git a/devops/gc/deploy/templates/hpa.yaml b/devops/gc/deploy/templates/hpa.yaml new file mode 100644 index 000000000..c7dd90469 --- /dev/null +++ b/devops/gc/deploy/templates/hpa.yaml @@ -0,0 +1,43 @@ +{{- if and (eq .Values.global.tier "PROD") (eq .Values.global.autoscaling true) }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ printf "%s-hpa" .Values.conf.appName | quote }} + namespace: {{ .Release.Namespace | quote }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ .Values.conf.appName | quote }} + minReplicas: {{ .Values.hpa.minReplicas }} + maxReplicas: {{ .Values.hpa.maxReplicas }} + metrics: + - type: External + external: + metric: + name: istio.io|service|server|request_count + selector: + matchLabels: + metric.labels.destination_workload_name: {{ .Values.conf.appName | quote }} + target: + target: + type: {{ .Values.hpa.targetType | quote }} + {{- if eq .Values.hpa.targetType "AverageValue" }} + averageValue: {{ .Values.hpa.targetValue }} + {{- else if eq .Values.hpa.targetType "Value" }} + value: {{ .Values.hpa.targetValue }} + {{- end }} + behavior: + scaleUp: + stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleUpStabilizationWindowSeconds }} + policies: + - type: Percent + value: {{ .Values.hpa.behaviorScaleUpPoliciesValue }} + periodSeconds: {{ .Values.hpa.behaviorScaleUpPoliciesPeriodSeconds }} + scaleDown: + stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleDownStabilizationWindowSeconds }} + policies: + - type: Percent + value: {{ .Values.hpa.behaviorScaleDownPoliciesValue }} + periodSeconds: {{ .Values.hpa.behaviorScaleDownPoliciesPeriodSeconds }} +{{- end }} diff --git a/devops/gc/deploy/templates/rate-limit.yaml b/devops/gc/deploy/templates/rate-limit.yaml new file mode 100644 index 000000000..f60af4b18 --- /dev/null +++ b/devops/gc/deploy/templates/rate-limit.yaml @@ -0,0 +1,43 @@ +{{- if and (eq .Values.global.tier "PROD") (eq .Values.global.autoscaling true) }} +apiVersion: networking.istio.io/v1alpha3 +kind: EnvoyFilter +metadata: + name: {{ printf "%s-ratelimit" .Values.conf.appName | quote }} + namespace: {{ .Release.Namespace | quote }} +spec: + workloadSelector: + labels: + app: {{ .Values.conf.appName | quote }} + configPatches: + - applyTo: "HTTP_FILTER" + match: + context: "SIDECAR_INBOUND" + listener: + filterChain: + filter: + name: "envoy.filters.network.http_connection_manager" + patch: + operation: INSERT_BEFORE + value: + name: "envoy.filters.http.local_ratelimit" + typed_config: + '@type': "type.googleapis.com/udpa.type.v1.TypedStruct" + type_url: "type.googleapis.com/envoy.extensions.filters.http.local_ratelimit.v3.LocalRateLimit" + value: + stat_prefix: "http_local_rate_limiter" + enable_x_ratelimit_headers: "DRAFT_VERSION_03" + token_bucket: + max_tokens: {{ .Values.local_ratelimit.max_tokens }} + tokens_per_fill: {{ .Values.local_ratelimit.tokens_per_fill }} + fill_interval: {{ .Values.local_ratelimit.fill_interval | quote}} + filter_enabled: + runtime_key: "local_rate_limit_enabled" + default_value: + numerator: 100 + denominator: "HUNDRED" + filter_enforced: + runtime_key: "local_rate_limit_enforced" + default_value: + numerator: 100 + denominator: "HUNDRED" +{{- end }} diff --git a/devops/gc/deploy/values.yaml b/devops/gc/deploy/values.yaml index 66b8dcaa9..63858c0b5 100644 --- a/devops/gc/deploy/values.yaml +++ b/devops/gc/deploy/values.yaml @@ -4,6 +4,8 @@ global: limitsEnabled: true dataPartitionId: "" logLevel: "ERROR" + tier: "" + autoscaling: true data: #Configmaps @@ -41,3 +43,20 @@ istio: proxyMemoryLimit: "512Mi" bootstrapProxyCPU: "10m" bootstrapProxyCPULimit: "100m" + +hpa: + minReplicas: 6 + maxReplicas: 15 + targetType: "AverageValue" + targetValue: 120 # rps*0.85*2 + behaviorScaleUpStabilizationWindowSeconds: 10 + behaviorScaleUpPoliciesValue: 50 + behaviorScaleUpPoliciesPeriodSeconds: 15 + behaviorScaleDownStabilizationWindowSeconds: 60 + behaviorScaleDownPoliciesValue: 25 + behaviorScaleDownPoliciesPeriodSeconds: 60 + +local_ratelimit: + max_tokens: 70 # rps + tokens_per_fill: 70 + fill_interval: "1s" -- GitLab