From 5209e09e691f51dc719b774a07e297e5ea8df198 Mon Sep 17 00:00:00 2001
From: "Ihor Anikeiev [EPAM / GCP]" <ihor_anikeiev@epam.com>
Date: Wed, 8 Jan 2025 14:50:30 +0000
Subject: [PATCH] Updated HPA parameters

---
 devops/gc/deploy/README.md   | 40 +++++++++++++++++++++---------------
 devops/gc/deploy/values.yaml |  8 ++++----
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/devops/gc/deploy/README.md b/devops/gc/deploy/README.md
index 96e921c15..fb2ae1d1b 100644
--- a/devops/gc/deploy/README.md
+++ b/devops/gc/deploy/README.md
@@ -83,26 +83,34 @@ First you need to set variables in **values.yaml** file using any code editor. S
 
 ### Horizontal Pod Autoscaling (HPA) variables (works only if tier=PROD and autoscaling=true)
 
-| Name                                                | Description                                                                   | Type    | Default        | Required                                                       |
-|-----------------------------------------------------|-------------------------------------------------------------------------------|---------|----------------|----------------------------------------------------------------|
-| **hpa.minReplicas**                                 | minimum number of replicas                                                    | integer | `10`           | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.maxReplicas**                                 | maximum number of replicas                                                    | integer | `20`           | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.targetType**                                  | type of measurements: AverageValue or Value                                   | string  | `AverageValue` | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.targetValue**                                 | threshold value to trigger the scaling up                                     | integer | `40`           | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpStabilizationWindowSeconds**   | time to start implementing the scale up when it is triggered                  | integer | `10`           | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpPoliciesValue**                | the maximum number of new replicas to create (in percents from current state) | integer | `50`           | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleUpPoliciesPeriodSeconds**        | pause for every new scale up decision                                         | integer | `15`           | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleDownStabilizationWindowSeconds** | time to start implementing the scale down when it is triggered                | integer | `60`           | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleDownPoliciesValue**              | the maximum number of replicas to destroy (in percents from current state)    | integer | `25`           | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **hpa.behaviorScaleDownPoliciesPeriodSeconds**      | pause for every new scale down decision                                       | integer | `60`           | only if `global.autoscaling` is true and `global.tier` is PROD |
+| Name                                                | Description                                                                   | Type    | Default          | Required                                                       |
+|-----------------------------------------------------|-------------------------------------------------------------------------------|---------|------------------|----------------------------------------------------------------|
+| **hpa.minReplicas**                                 | minimum number of replicas                                                    | integer | `6`              | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.maxReplicas**                                 | maximum number of replicas                                                    | integer | `20`             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.targetType**                                  | type of measurements: AverageValue or Value                                   | string  | `"AverageValue"` | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.targetValue**                                 | threshold value to trigger the scaling up                                     | integer | `45`             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpStabilizationWindowSeconds**   | time to start implementing the scale up when it is triggered                  | integer | `10`             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpPoliciesValue**                | the maximum number of new replicas to create (in percents from current state) | integer | `50`             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleUpPoliciesPeriodSeconds**        | pause for every new scale up decision                                         | integer | `15`             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleDownStabilizationWindowSeconds** | time to start implementing the scale down when it is triggered                | integer | `60`             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleDownPoliciesValue**              | the maximum number of replicas to destroy (in percents from current state)    | integer | `25`             | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **hpa.behaviorScaleDownPoliciesPeriodSeconds**      | pause for every new scale down decision                                       | integer | `60`             | only if `global.autoscaling` is true and `global.tier` is PROD |
 
 ### Limits variables
 
 | Name                     | Description                                     | Type    | Default | Required                                                       |
-|--------------------------|-------------------------------------------------|---------|---------|----------------------------------------------------------------|
-| **limits.maxTokens**     | maximum number of requests per fillInterval     | integer | `25`    | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **limits.tokensPerFill** | number of new tokens allowed every fillInterval | integer | `25`    | only if `global.autoscaling` is true and `global.tier` is PROD |
-| **limits.fillInterval**  | time interval                                   | string  | `1s`    | only if `global.autoscaling` is true and `global.tier` is PROD |
+|--------------------------|-------------------------------------------------|---------|--------|----------------------------------------------------------------|
+| **limits.maxTokens**     | maximum number of requests per fillInterval     | integer | `30`   | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **limits.tokensPerFill** | number of new tokens allowed every fillInterval | integer | `30`   | only if `global.autoscaling` is true and `global.tier` is PROD |
+| **limits.fillInterval**  | time interval                                   | string  | `"1s"` | only if `global.autoscaling` is true and `global.tier` is PROD |
+
+### Methodology for Parameter Calculation variables: **hpa.targetValue**, **limits.maxTokens** and **limits.tokensPerFill**
+
+The parameters **hpa.targetValue**, **limits.maxTokens** and **limits.tokensPerFill** were determined through empirical testing during load testing. These tests were conducted using the N2D machine series, which can run on either AMD EPYC Milan or AMD EPYC Rome processors. The values were fine-tuned to ensure optimal performance under typical workloads.
+
+### Recommendations for New Instance Types
+
+When changing the instance type to a newer generation, such as the C3D series, it is essential to conduct new load testing. This ensures the parameters are recalibrated to match the performance characteristics of the new processor architecture, optimizing resource utilization and maintaining application stability.
 
 ## Install the Helm chart
 
diff --git a/devops/gc/deploy/values.yaml b/devops/gc/deploy/values.yaml
index 953249158..5cb0ca592 100644
--- a/devops/gc/deploy/values.yaml
+++ b/devops/gc/deploy/values.yaml
@@ -44,10 +44,10 @@ istio:
   proxyMemoryLimit: "256Mi"
 
 hpa:
-  minReplicas: 10
+  minReplicas: 6
   maxReplicas: 20
   targetType: "AverageValue"
-  targetValue: 40   # rps*0.85*2
+  targetValue: 45
   behaviorScaleUpStabilizationWindowSeconds: 10
   behaviorScaleUpPoliciesValue: 50
   behaviorScaleUpPoliciesPeriodSeconds: 15
@@ -56,6 +56,6 @@ hpa:
   behaviorScaleDownPoliciesPeriodSeconds: 60
 
 local_ratelimit:
-  max_tokens: 25   # rps
-  tokens_per_fill: 25
+  max_tokens: 30
+  tokens_per_fill: 30
   fill_interval: "1s"
-- 
GitLab