Gonrg 10265 bring autoscaling

5b69b322 · Siarhei Poliak [EPAM / GCP] · Oleksandr Kosse (EPAM) · 1a5ab4f9 · 5b69b322 · 5b69b322
Commit 5b69b322 authored 5 months ago by Siarhei Poliak [EPAM / GCP] Committed by Oleksandr Kosse (EPAM) 5 months ago
--- a/devops/gc/deploy/README.md
+++ b/devops/gc/deploy/README.md
@@ -36,6 +36,8 @@ First you need to set variables in **values.yaml** file using any code editor. S
 **global.limitsEnabled** | whether CPU and memory limits are enabled | boolean | `true` | yes
 **global.dataPartitionId** | data partition id | string | - | yes
 **global.logLevel** | severity of logging level | string | `ERROR` | yes
+**global.tier** | Only PROD must be used to enable autoscaling | string | "" | no
+**global.autoscaling** | enables horizontal pod autoscaling, when tier=PROD | boolean | true | yes

 ### Common variables

@@ -102,6 +104,29 @@ First you need to set variables in **values.yaml** file using any code editor. S
 **istio.bootstrapProxyCPU** | CPU request for Envoy sidecars | string | `10m` | yes
 **istio.bootstrapProxyCPULimit** | CPU limit for Envoy sidecars | string | `100m` | yes

+### Horizontal Pod Autoscaling (HPA) variables (works only if tier=PROD and autoscaling=true)
+
+| Name | Description | Type | Default |Required |
+|------|-------------|------|---------|---------|
+**hpa.minReplicas** | minimum number of replicas | integer | 6 | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.maxReplicas** | maximum number of replicas | integer | 15 | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.targetType** | type of measurements: AverageValue or Value | string | "AverageValue" | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.targetValue** | threshold value to trigger the scaling up | integer | 140 | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.behaviorScaleUpStabilizationWindowSeconds** | time to start implementing the scale up when it is triggered | integer | 10 | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.behaviorScaleUpPoliciesValue** | the maximum number of new replicas to create (in percents from current state)| integer | 50 | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.behaviorScaleUpPoliciesPeriodSeconds** | pause for every new scale up decision | integer | 15 | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.behaviorScaleDownStabilizationWindowSeconds** | time to start implementing the scale down when it is triggered | integer | 60 | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.behaviorScaleDownPoliciesValue** | the maximum number of replicas to destroy (in percents from current state) | integer | 25 | only if `global.autoscaling` is true and `global.tier` is PROD
+**hpa.behaviorScaleDownPoliciesPeriodSeconds** | pause for every new scale down decision | integer | 60 | only if `global.autoscaling` is true and `global.tier` is PROD
+
+### Limits variables
+
+| Name | Description | Type | Default |Required |
+|------|-------------|------|---------|---------|
+**limits.maxTokens** | maximum number of requests per fillInterval | integer | 80 | only if `global.autoscaling` is true and `global.tier` is PROD
+**limits.tokensPerFill** | number of new tokens allowed every fillInterval | integer | 80 | only if `global.autoscaling` is true and `global.tier` is PROD
+**limits.fillInterval** | time interval | string | "1s" | only if `global.autoscaling` is true and `global.tier` is PROD
+
 ### Install the helm chart

 Run this command from within this directory:

--- a/devops/gc/deploy/templates/opa-hpa.yaml
+++ b/devops/gc/deploy/templates/opa-hpa.yaml
+{{- if and (eq .Values.global.tier "PROD") (eq .Values.global.autoscaling true) }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ printf "%s-hpa" .Values.opa.conf.appName | quote }}
+  namespace: {{ .Release.Namespace | quote }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ .Values.opa.conf.appName | quote }}
+  minReplicas: {{ .Values.hpa.minReplicas }}
+  maxReplicas: {{ .Values.hpa.maxReplicas }}
+  metrics:
+    - type: External
+      external:
+        metric:
+          name: istio.io|service|server|request_count
+          selector:
+            matchLabels:
+              metric.labels.destination_workload_name: {{ .Values.opa.conf.appName | quote }}
+        target:
+          type: {{ .Values.hpa.targetType | quote }}
+          {{- if eq .Values.hpa.targetType "AverageValue" }}
+          averageValue: {{ .Values.hpa.targetValue }}
+          {{- else if eq .Values.hpa.targetType "Value" }}
+          value: {{ .Values.hpa.targetValue }}
+          {{- end }}
+  behavior:
+    scaleUp:
+      stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleUpStabilizationWindowSeconds }}
+      policies:
+        - type: Percent
+          value: {{ .Values.hpa.behaviorScaleUpPoliciesValue }}
+          periodSeconds: {{ .Values.hpa.behaviorScaleUpPoliciesPeriodSeconds }}
+    scaleDown:
+      stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleDownStabilizationWindowSeconds }}
+      policies:
+        - type: Percent
+          value: {{ .Values.hpa.behaviorScaleDownPoliciesValue }}
+          periodSeconds: {{ .Values.hpa.behaviorScaleDownPoliciesPeriodSeconds }}
+{{- end }}
--- a/devops/gc/deploy/templates/opa-rate-limits.yaml
+++ b/devops/gc/deploy/templates/opa-rate-limits.yaml
+{{- if and (eq .Values.global.tier "PROD") (eq .Values.global.autoscaling true) }}
+apiVersion: networking.istio.io/v1alpha3
+kind: EnvoyFilter
+metadata:
+  name: {{ printf "%s-ratelimit" .Values.opa.conf.appName | quote }}
+  namespace: {{ .Release.Namespace | quote }}
+spec:
+  workloadSelector:
+    labels:
+      app: {{ .Values.opa.conf.appName | quote }}
+  configPatches:
+    - applyTo: "HTTP_FILTER"
+      match:
+        context: "SIDECAR_INBOUND"
+        listener:
+          filterChain:
+            filter:
+              name: "envoy.filters.network.http_connection_manager"
+      patch:
+        operation: INSERT_BEFORE
+        value:
+          name: "envoy.filters.http.local_ratelimit"
+          typed_config:
+            '@type': "type.googleapis.com/udpa.type.v1.TypedStruct"
+            type_url: "type.googleapis.com/envoy.extensions.filters.http.local_ratelimit.v3.LocalRateLimit"
+            value:
+              stat_prefix: "http_local_rate_limiter"
+              enable_x_ratelimit_headers: "DRAFT_VERSION_03"
+              token_bucket:
+                max_tokens: {{ .Values.limits.maxTokens }}
+                tokens_per_fill: {{ .Values.limits.tokensPerFill }}
+                fill_interval: {{ .Values.limits.fillInterval  | quote}}
+              filter_enabled:
+                runtime_key: "local_rate_limit_enabled"
+                default_value:
+                  numerator: 100
+                  denominator: "HUNDRED"
+              filter_enforced:
+                runtime_key: "local_rate_limit_enforced"
+                default_value:
+                  numerator: 100
+                  denominator: "HUNDRED"
+{{- end }}
--- a/devops/gc/deploy/templates/policy-hpa.yaml
+++ b/devops/gc/deploy/templates/policy-hpa.yaml
+{{- if and (eq .Values.global.tier "PROD") (eq .Values.global.autoscaling true) }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ printf "%s-hpa" .Values.conf.appName | quote }}
+  namespace: {{ .Release.Namespace | quote }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ .Values.conf.appName | quote }}
+  minReplicas: {{ .Values.hpa.minReplicas }}
+  maxReplicas: {{ .Values.hpa.maxReplicas }}
+  metrics:
+    - type: External
+      external:
+        metric:
+          name: istio.io|service|server|request_count
+          selector:
+            matchLabels:
+              metric.labels.destination_workload_name: {{ .Values.conf.appName | quote }}
+        target:
+          type: {{ .Values.hpa.targetType | quote }}
+          {{- if eq .Values.hpa.targetType "AverageValue" }}
+          averageValue: {{ .Values.hpa.targetValue }}
+          {{- else if eq .Values.hpa.targetType "Value" }}
+          value: {{ .Values.hpa.targetValue }}
+          {{- end }}
+  behavior:
+    scaleUp:
+      stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleUpStabilizationWindowSeconds }}
+      policies:
+        - type: Percent
+          value: {{ .Values.hpa.behaviorScaleUpPoliciesValue }}
+          periodSeconds: {{ .Values.hpa.behaviorScaleUpPoliciesPeriodSeconds }}
+    scaleDown:
+      stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleDownStabilizationWindowSeconds }}
+      policies:
+        - type: Percent
+          value: {{ .Values.hpa.behaviorScaleDownPoliciesValue }}
+          periodSeconds: {{ .Values.hpa.behaviorScaleDownPoliciesPeriodSeconds }}
+{{- end }}
--- a/devops/gc/deploy/templates/policy-rate-limits.yaml
+++ b/devops/gc/deploy/templates/policy-rate-limits.yaml
+{{- if and (eq .Values.global.tier "PROD") (eq .Values.global.autoscaling true) }}
+apiVersion: networking.istio.io/v1alpha3
+kind: EnvoyFilter
+metadata:
+  name: {{ printf "%s-ratelimit" .Values.conf.appName | quote }}
+  namespace: {{ .Release.Namespace | quote }}
+spec:
+  workloadSelector:
+    labels:
+      app: {{ .Values.conf.appName | quote }}
+  configPatches:
+    - applyTo: "HTTP_FILTER"
+      match:
+        context: "SIDECAR_INBOUND"
+        listener:
+          filterChain:
+            filter:
+              name: "envoy.filters.network.http_connection_manager"
+      patch:
+        operation: INSERT_BEFORE
+        value:
+          name: "envoy.filters.http.local_ratelimit"
+          typed_config:
+            '@type': "type.googleapis.com/udpa.type.v1.TypedStruct"
+            type_url: "type.googleapis.com/envoy.extensions.filters.http.local_ratelimit.v3.LocalRateLimit"
+            value:
+              stat_prefix: "http_local_rate_limiter"
+              enable_x_ratelimit_headers: "DRAFT_VERSION_03"
+              token_bucket:
+                max_tokens: {{ .Values.limits.maxTokens }}
+                tokens_per_fill: {{ .Values.limits.tokensPerFill }}
+                fill_interval: {{ .Values.limits.fillInterval  | quote}}
+              filter_enabled:
+                runtime_key: "local_rate_limit_enabled"
+                default_value:
+                  numerator: 100
+                  denominator: "HUNDRED"
+              filter_enforced:
+                runtime_key: "local_rate_limit_enforced"
+                default_value:
+                  numerator: 100
+                  denominator: "HUNDRED"
+{{- end }}
--- a/devops/gc/deploy/values.yaml
+++ b/devops/gc/deploy/values.yaml
@@ -5,6 +5,8 @@ global:
  limitsEnabled: true
  dataPartitionId: ""
  logLevel: "ERROR"
+  tier: ""
+  autoscaling: true

 data:
  # Deployment resources
@@ -53,3 +55,20 @@ istio:
  proxyMemoryLimit: 512Mi
  bootstrapProxyCPU: 5m
  bootstrapProxyCPULimit: 100m
+
+hpa:
+  minReplicas: 6
+  maxReplicas: 15
+  targetType: "AverageValue"
+  targetValue: 200 #100 rps
+  behaviorScaleUpStabilizationWindowSeconds: 10
+  behaviorScaleUpPoliciesValue: 50
+  behaviorScaleUpPoliciesPeriodSeconds: 15
+  behaviorScaleDownStabilizationWindowSeconds: 60
+  behaviorScaleDownPoliciesValue: 25
+  behaviorScaleDownPoliciesPeriodSeconds: 60
+
+limits:
+  maxTokens: 120
+  tokensPerFill: 120
+  fillInterval: "1s"