From b6717db03813a55967d19267b06ee2c2593cc6d2 Mon Sep 17 00:00:00 2001
From: "Yauheni  Rykhter (EPAM)" <yauheni_rykhter@epam.com>
Date: Tue, 18 Feb 2025 18:20:29 +0000
Subject: [PATCH] Gonrg 10592 indexer autoscaling

---
 devops/gc/deploy/README.md          | 22 +++++-----
 devops/gc/deploy/templates/hpa.yaml | 62 +++++++++++++++++++++++------
 devops/gc/deploy/values.yaml        | 18 +++++----
 3 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/devops/gc/deploy/README.md b/devops/gc/deploy/README.md
index a49f0b569..711c492e3 100644
--- a/devops/gc/deploy/README.md
+++ b/devops/gc/deploy/README.md
@@ -84,16 +84,20 @@ First you need to set variables in **values.yaml** file using any code editor. S
 
 | Name                                                | Description                                                                   | Type    | Default        | Required                                                       |
 |-----------------------------------------------------|-------------------------------------------------------------------------------|---------|----------------|----------------------------------------------------------------|
-| **hpa.minReplicas**                                 | minimum number of replicas                                                    | integer | 6              | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.maxReplicas**                                 | maximum number of replicas                                                    | integer | 15             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.targetType**                                  | type of measurements: AverageValue or Value                                   | string  | "AverageValue" | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.targetValue**                                 | threshold value to trigger the scaling up                                     | integer | 80             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpStabilizationWindowSeconds**   | time to start implementing the scale up when it is triggered                  | integer | 10             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpPoliciesValue**                | the maximum number of new replicas to create (in percents from current state) | integer | 50             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpPoliciesPeriodSeconds**        | pause for every new scale up decision                                         | integer | 15             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.minReplicas**  | minimum number of replicas | integer | 6 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.maxReplicas**  | maximum number of replicas | integer | 15 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.pubsubTargetType** | type of measurements: AverageValue | string | "AverageValue" | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.pubsubTargetValue**   | threshold value to trigger the scaling up | integer | 50 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.cpuTargetType**   | type of measurements: Utilization | string | "Utilization" | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.cpuTargetValue**  | threshold value to trigger the scaling up | integer | 70 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.memoryTargetType** | type of measurements: Utilization | string | "Utilization" | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.memoryTargetValue** | threshold value to trigger the scaling up | integer | 70 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpStabilizationWindowSeconds**   | time to start implementing the scale up when it is triggered                  | integer | 60             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpPoliciesValue**                | the maximum number of new replicas to create (in percents from current state) | integer | 1             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpPoliciesPeriodSeconds**        | pause for every new scale up decision                                         | integer | 120             | only if `global.autoscaling` is true and `global.tier` is PROD |
 | **hpa.behaviorScaleDownStabilizationWindowSeconds** | time to start implementing the scale down when it is triggered                | integer | 60             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleDownPoliciesValue**              | the maximum number of replicas to destroy (in percents from current state)    | integer | 25             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleDownPoliciesPeriodSeconds**      | pause for every new scale down decision                                       | integer | 60             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleDownPoliciesValue**              | the maximum number of replicas to destroy (in percents from current state)    | integer | 1             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleDownPoliciesPeriodSeconds**      | pause for every new scale down decision                                       | integer | 120             | only if `global.autoscaling` is true and `global.tier` is PROD |
 
 ### Limits variables
 
diff --git a/devops/gc/deploy/templates/hpa.yaml b/devops/gc/deploy/templates/hpa.yaml
index 3cda953f2..0dca0a867 100644
--- a/devops/gc/deploy/templates/hpa.yaml
+++ b/devops/gc/deploy/templates/hpa.yaml
@@ -12,31 +12,69 @@ spec:
   minReplicas: {{ .Values.hpa.minReplicas }}
   maxReplicas: {{ .Values.hpa.maxReplicas }}
   metrics:
-    - type: External
-      external:
+    - external:
         metric:
-          name: istio.io|service|server|request_count
+          name: pubsub.googleapis.com|subscription|num_undelivered_messages
           selector:
             matchLabels:
-              metric.labels.destination_workload_name: {{ .Values.conf.appName | quote }}
+              resource.labels.subscription_id: indexer-records-changed
         target:
-          type: {{ .Values.hpa.targetType | quote }}
-          {{- if eq .Values.hpa.targetType "AverageValue" }}
-          averageValue: {{ .Values.hpa.targetValue }}
-          {{- else if eq .Values.hpa.targetType "Value" }}
-          value: {{ .Values.hpa.targetValue }}
-          {{- end }}
+          type: {{ .Values.hpa.pubsubTargetType | quote }}
+          averageValue: {{ .Values.hpa.pubsubTargetValue }}
+      type: External
+    - external:
+        metric:
+          name: pubsub.googleapis.com|subscription|num_undelivered_messages
+          selector:
+            matchLabels:
+              resource.labels.subscription_id: indexer-reindex
+        target:
+          type: {{ .Values.hpa.pubsubTargetType | quote }}
+          averageValue: {{ .Values.hpa.pubsubTargetValue }}
+      type: External
+    - external:
+        metric:
+          name: pubsub.googleapis.com|subscription|num_undelivered_messages
+          selector:
+            matchLabels:
+              resource.labels.subscription_id: indexer-reprocess
+        target:
+          type: {{ .Values.hpa.pubsubTargetType | quote }}
+          averageValue: {{ .Values.hpa.pubsubTargetValue }}
+      type: External
+    - external:
+        metric:
+          name: pubsub.googleapis.com|subscription|num_messages_received
+          selector:
+            matchLabels:
+              resource.labels.subscription_id: indexer-schema-changed
+        target:
+          type: {{ .Values.hpa.pubsubTargetType | quote }}
+          averageValue: {{ .Values.hpa.pubsubTargetValue }}
+      type: External
+    - resource:
+        name: cpu
+        target:
+          type: {{ .Values.hpa.cpuTargetType | quote }}
+          averageUtilization: {{ .Values.hpa.cpuTargetValue }}
+      type: Resource
+    - resource:
+        name: memory
+        target:
+          type: {{ .Values.hpa.memoryTargetType | quote }}
+          averageUtilization: {{ .Values.hpa.memoryTargetValue }}
+      type: Resource
   behavior:
     scaleUp:
       stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleUpStabilizationWindowSeconds }}
       policies:
-        - type: Percent
+        - type: Pods
           value: {{ .Values.hpa.behaviorScaleUpPoliciesValue }}
           periodSeconds: {{ .Values.hpa.behaviorScaleUpPoliciesPeriodSeconds }}
     scaleDown:
       stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleDownStabilizationWindowSeconds }}
       policies:
-        - type: Percent
+        - type: Pods
           value: {{ .Values.hpa.behaviorScaleDownPoliciesValue }}
           periodSeconds: {{ .Values.hpa.behaviorScaleDownPoliciesPeriodSeconds }}
 {{- end }}
diff --git a/devops/gc/deploy/values.yaml b/devops/gc/deploy/values.yaml
index a0bd52703..e0865d492 100644
--- a/devops/gc/deploy/values.yaml
+++ b/devops/gc/deploy/values.yaml
@@ -47,14 +47,18 @@ istio:
 hpa:
   minReplicas: 6
   maxReplicas: 15
-  targetType: "AverageValue"
-  targetValue: 340 #200rps*0.85.*2
-  behaviorScaleUpStabilizationWindowSeconds: 10
-  behaviorScaleUpPoliciesValue: 50
-  behaviorScaleUpPoliciesPeriodSeconds: 15
+  pubsubTargetType: AverageValue
+  pubsubTargetValue: 50
+  cpuTargetType: Utilization
+  cpuTargetValue: 70
+  memoryTargetType: Utilization
+  memoryTargetValue: 70
+  behaviorScaleUpStabilizationWindowSeconds: 60
+  behaviorScaleUpPoliciesValue: 1
+  behaviorScaleUpPoliciesPeriodSeconds: 120
   behaviorScaleDownStabilizationWindowSeconds: 60
-  behaviorScaleDownPoliciesValue: 25
-  behaviorScaleDownPoliciesPeriodSeconds: 60
+  behaviorScaleDownPoliciesValue: 1
+  behaviorScaleDownPoliciesPeriodSeconds: 120
 
 limits:
   maxTokens: 200 #rps
-- 
GitLab