From 3781249e5f598e15dccc233bc531dd960c6b917b Mon Sep 17 00:00:00 2001
From: Yauheni Rykhter <yauheni_rykhter@epam.com>
Date: Tue, 18 Feb 2025 10:23:08 +0400
Subject: [PATCH 1/2] GONRG-10592: indexer autoscaling

---
 devops/gc/deploy/templates/hpa.yaml | 62 +++++++++++++++++++++++------
 devops/gc/deploy/values.yaml        | 18 +++++----
 2 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/devops/gc/deploy/templates/hpa.yaml b/devops/gc/deploy/templates/hpa.yaml
index 3cda953f2..0dca0a867 100644
--- a/devops/gc/deploy/templates/hpa.yaml
+++ b/devops/gc/deploy/templates/hpa.yaml
@@ -12,31 +12,69 @@ spec:
   minReplicas: {{ .Values.hpa.minReplicas }}
   maxReplicas: {{ .Values.hpa.maxReplicas }}
   metrics:
-    - type: External
-      external:
+    - external:
         metric:
-          name: istio.io|service|server|request_count
+          name: pubsub.googleapis.com|subscription|num_undelivered_messages
           selector:
             matchLabels:
-              metric.labels.destination_workload_name: {{ .Values.conf.appName | quote }}
+              resource.labels.subscription_id: indexer-records-changed
         target:
-          type: {{ .Values.hpa.targetType | quote }}
-          {{- if eq .Values.hpa.targetType "AverageValue" }}
-          averageValue: {{ .Values.hpa.targetValue }}
-          {{- else if eq .Values.hpa.targetType "Value" }}
-          value: {{ .Values.hpa.targetValue }}
-          {{- end }}
+          type: {{ .Values.hpa.pubsubTargetType | quote }}
+          averageValue: {{ .Values.hpa.pubsubTargetValue }}
+      type: External
+    - external:
+        metric:
+          name: pubsub.googleapis.com|subscription|num_undelivered_messages
+          selector:
+            matchLabels:
+              resource.labels.subscription_id: indexer-reindex
+        target:
+          type: {{ .Values.hpa.pubsubTargetType | quote }}
+          averageValue: {{ .Values.hpa.pubsubTargetValue }}
+      type: External
+    - external:
+        metric:
+          name: pubsub.googleapis.com|subscription|num_undelivered_messages
+          selector:
+            matchLabels:
+              resource.labels.subscription_id: indexer-reprocess
+        target:
+          type: {{ .Values.hpa.pubsubTargetType | quote }}
+          averageValue: {{ .Values.hpa.pubsubTargetValue }}
+      type: External
+    - external:
+        metric:
+          name: pubsub.googleapis.com|subscription|num_messages_received
+          selector:
+            matchLabels:
+              resource.labels.subscription_id: indexer-schema-changed
+        target:
+          type: {{ .Values.hpa.pubsubTargetType | quote }}
+          averageValue: {{ .Values.hpa.pubsubTargetValue }}
+      type: External
+    - resource:
+        name: cpu
+        target:
+          type: {{ .Values.hpa.cpuTargetType | quote }}
+          averageUtilization: {{ .Values.hpa.cpuTargetValue }}
+      type: Resource
+    - resource:
+        name: memory
+        target:
+          type: {{ .Values.hpa.memoryTargetType | quote }}
+          averageUtilization: {{ .Values.hpa.memoryTargetValue }}
+      type: Resource
   behavior:
     scaleUp:
       stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleUpStabilizationWindowSeconds }}
       policies:
-        - type: Percent
+        - type: Pods
           value: {{ .Values.hpa.behaviorScaleUpPoliciesValue }}
           periodSeconds: {{ .Values.hpa.behaviorScaleUpPoliciesPeriodSeconds }}
     scaleDown:
       stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleDownStabilizationWindowSeconds }}
       policies:
-        - type: Percent
+        - type: Pods
           value: {{ .Values.hpa.behaviorScaleDownPoliciesValue }}
           periodSeconds: {{ .Values.hpa.behaviorScaleDownPoliciesPeriodSeconds }}
 {{- end }}
diff --git a/devops/gc/deploy/values.yaml b/devops/gc/deploy/values.yaml
index 57d356d3a..8ebb9ef0b 100644
--- a/devops/gc/deploy/values.yaml
+++ b/devops/gc/deploy/values.yaml
@@ -48,14 +48,18 @@ istio:
 hpa:
   minReplicas: 6
   maxReplicas: 15
-  targetType: "AverageValue"
-  targetValue: 340 #200rps*0.85.*2
-  behaviorScaleUpStabilizationWindowSeconds: 10
-  behaviorScaleUpPoliciesValue: 50
-  behaviorScaleUpPoliciesPeriodSeconds: 15
+  pubsubTargetType: AverageValue
+  pubsubTargetValue: 50
+  cpuTargetType: Utilization
+  cpuTargetValue: 70
+  memoryTargetType: Utilization
+  memoryTargetValue: 70
+  behaviorScaleUpStabilizationWindowSeconds: 60
+  behaviorScaleUpPoliciesValue: 1
+  behaviorScaleUpPoliciesPeriodSeconds: 120
   behaviorScaleDownStabilizationWindowSeconds: 60
-  behaviorScaleDownPoliciesValue: 25
-  behaviorScaleDownPoliciesPeriodSeconds: 60
+  behaviorScaleDownPoliciesValue: 1
+  behaviorScaleDownPoliciesPeriodSeconds: 120
 
 limits:
   maxTokens: 200 #rps
-- 
GitLab


From dd9a1cb92d314f68bc969d3915ec7c3d55e20c41 Mon Sep 17 00:00:00 2001
From: Yauheni Rykhter <yauheni_rykhter@epam.com>
Date: Tue, 18 Feb 2025 10:31:44 +0400
Subject: [PATCH 2/2] GONRG-10592: indexer autoscaling

---
 devops/gc/deploy/README.md | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/devops/gc/deploy/README.md b/devops/gc/deploy/README.md
index c4562497c..879cafbaa 100644
--- a/devops/gc/deploy/README.md
+++ b/devops/gc/deploy/README.md
@@ -85,16 +85,20 @@ First you need to set variables in **values.yaml** file using any code editor. S
 
 | Name                                                | Description                                                                   | Type    | Default        | Required                                                       |
 |-----------------------------------------------------|-------------------------------------------------------------------------------|---------|----------------|----------------------------------------------------------------|
-| **hpa.minReplicas**                                 | minimum number of replicas                                                    | integer | 6              | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.maxReplicas**                                 | maximum number of replicas                                                    | integer | 15             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.targetType**                                  | type of measurements: AverageValue or Value                                   | string  | "AverageValue" | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.targetValue**                                 | threshold value to trigger the scaling up                                     | integer | 80             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpStabilizationWindowSeconds**   | time to start implementing the scale up when it is triggered                  | integer | 10             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpPoliciesValue**                | the maximum number of new replicas to create (in percents from current state) | integer | 50             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpPoliciesPeriodSeconds**        | pause for every new scale up decision                                         | integer | 15             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.minReplicas**  | minimum number of replicas | integer | 6 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.maxReplicas**  | maximum number of replicas | integer | 15 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.pubsubTargetType** | type of measurements: AverageValue | string | "AverageValue" | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.pubsubTargetValue**   | threshold value to trigger the scaling up | integer | 50 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.cpuTargetType**   | type of measurements: Utilization | string | "Utilization" | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.cpuTargetValue**  | threshold value to trigger the scaling up | integer | 70 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.memoryTargetType** | type of measurements: Utilization | string | "Utilization" | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.memoryTargetValue** | threshold value to trigger the scaling up | integer | 70 | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpStabilizationWindowSeconds**   | time to start implementing the scale up when it is triggered                  | integer | 60             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpPoliciesValue**                | the maximum number of new replicas to create (in percents from current state) | integer | 1             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpPoliciesPeriodSeconds**        | pause for every new scale up decision                                         | integer | 120             | only if `global.autoscaling` is true and `global.tier` is PROD |
 | **hpa.behaviorScaleDownStabilizationWindowSeconds** | time to start implementing the scale down when it is triggered                | integer | 60             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleDownPoliciesValue**              | the maximum number of replicas to destroy (in percents from current state)    | integer | 25             | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleDownPoliciesPeriodSeconds**      | pause for every new scale down decision                                       | integer | 60             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleDownPoliciesValue**              | the maximum number of replicas to destroy (in percents from current state)    | integer | 1             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleDownPoliciesPeriodSeconds**      | pause for every new scale down decision                                       | integer | 120             | only if `global.autoscaling` is true and `global.tier` is PROD |
 
 ### Limits variables
 
-- 
GitLab