From b6717db03813a55967d19267b06ee2c2593cc6d2 Mon Sep 17 00:00:00 2001 From: "Yauheni Rykhter (EPAM)" <yauheni_rykhter@epam.com> Date: Tue, 18 Feb 2025 18:20:29 +0000 Subject: [PATCH] Gonrg 10592 indexer autoscaling --- devops/gc/deploy/README.md | 22 +++++----- devops/gc/deploy/templates/hpa.yaml | 62 +++++++++++++++++++++++------ devops/gc/deploy/values.yaml | 18 +++++---- 3 files changed, 74 insertions(+), 28 deletions(-) diff --git a/devops/gc/deploy/README.md b/devops/gc/deploy/README.md index a49f0b569..711c492e3 100644 --- a/devops/gc/deploy/README.md +++ b/devops/gc/deploy/README.md @@ -84,16 +84,20 @@ First you need to set variables in **values.yaml** file using any code editor. S | Name | Description | Type | Default | Required | |-----------------------------------------------------|-------------------------------------------------------------------------------|---------|----------------|----------------------------------------------------------------| -| **hpa.minReplicas** | minimum number of replicas | integer | 6 | only if `global.autoscaling` is true and `global.tier` is PROD | -| **hpa.maxReplicas** | maximum number of replicas | integer | 15 | only if `global.autoscaling` is true and `global.tier` is PROD | -| **hpa.targetType** | type of measurements: AverageValue or Value | string | "AverageValue" | only if `global.autoscaling` is true and `global.tier` is PROD | -| **hpa.targetValue** | threshold value to trigger the scaling up | integer | 80 | only if `global.autoscaling` is true and `global.tier` is PROD | -| **hpa.behaviorScaleUpStabilizationWindowSeconds** | time to start implementing the scale up when it is triggered | integer | 10 | only if `global.autoscaling` is true and `global.tier` is PROD | -| **hpa.behaviorScaleUpPoliciesValue** | the maximum number of new replicas to create (in percents from current state) | integer | 50 | only if `global.autoscaling` is true and `global.tier` is PROD | -| **hpa.behaviorScaleUpPoliciesPeriodSeconds** | pause for every new scale up decision | integer | 15 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.minReplicas** | minimum number of replicas | integer | 6 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.maxReplicas** | maximum number of replicas | integer | 15 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.pubsubTargetType** | type of measurements: AverageValue | string | "AverageValue" | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.pubsubTargetValue** | threshold value to trigger the scaling up | integer | 50 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.cpuTargetType** | type of measurements: Utilization | string | "Utilization" | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.cpuTargetValue** | threshold value to trigger the scaling up | integer | 70 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.memoryTargetType** | type of measurements: Utilization | string | "Utilization" | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.memoryTargetValue** | threshold value to trigger the scaling up | integer | 70 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.behaviorScaleUpStabilizationWindowSeconds** | time to start implementing the scale up when it is triggered | integer | 60 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.behaviorScaleUpPoliciesValue** | the maximum number of new replicas to create (in percents from current state) | integer | 1 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.behaviorScaleUpPoliciesPeriodSeconds** | pause for every new scale up decision | integer | 120 | only if `global.autoscaling` is true and `global.tier` is PROD | | **hpa.behaviorScaleDownStabilizationWindowSeconds** | time to start implementing the scale down when it is triggered | integer | 60 | only if `global.autoscaling` is true and `global.tier` is PROD | -| **hpa.behaviorScaleDownPoliciesValue** | the maximum number of replicas to destroy (in percents from current state) | integer | 25 | only if `global.autoscaling` is true and `global.tier` is PROD | -| **hpa.behaviorScaleDownPoliciesPeriodSeconds** | pause for every new scale down decision | integer | 60 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.behaviorScaleDownPoliciesValue** | the maximum number of replicas to destroy (in percents from current state) | integer | 1 | only if `global.autoscaling` is true and `global.tier` is PROD | +| **hpa.behaviorScaleDownPoliciesPeriodSeconds** | pause for every new scale down decision | integer | 120 | only if `global.autoscaling` is true and `global.tier` is PROD | ### Limits variables diff --git a/devops/gc/deploy/templates/hpa.yaml b/devops/gc/deploy/templates/hpa.yaml index 3cda953f2..0dca0a867 100644 --- a/devops/gc/deploy/templates/hpa.yaml +++ b/devops/gc/deploy/templates/hpa.yaml @@ -12,31 +12,69 @@ spec: minReplicas: {{ .Values.hpa.minReplicas }} maxReplicas: {{ .Values.hpa.maxReplicas }} metrics: - - type: External - external: + - external: metric: - name: istio.io|service|server|request_count + name: pubsub.googleapis.com|subscription|num_undelivered_messages selector: matchLabels: - metric.labels.destination_workload_name: {{ .Values.conf.appName | quote }} + resource.labels.subscription_id: indexer-records-changed target: - type: {{ .Values.hpa.targetType | quote }} - {{- if eq .Values.hpa.targetType "AverageValue" }} - averageValue: {{ .Values.hpa.targetValue }} - {{- else if eq .Values.hpa.targetType "Value" }} - value: {{ .Values.hpa.targetValue }} - {{- end }} + type: {{ .Values.hpa.pubsubTargetType | quote }} + averageValue: {{ .Values.hpa.pubsubTargetValue }} + type: External + - external: + metric: + name: pubsub.googleapis.com|subscription|num_undelivered_messages + selector: + matchLabels: + resource.labels.subscription_id: indexer-reindex + target: + type: {{ .Values.hpa.pubsubTargetType | quote }} + averageValue: {{ .Values.hpa.pubsubTargetValue }} + type: External + - external: + metric: + name: pubsub.googleapis.com|subscription|num_undelivered_messages + selector: + matchLabels: + resource.labels.subscription_id: indexer-reprocess + target: + type: {{ .Values.hpa.pubsubTargetType | quote }} + averageValue: {{ .Values.hpa.pubsubTargetValue }} + type: External + - external: + metric: + name: pubsub.googleapis.com|subscription|num_messages_received + selector: + matchLabels: + resource.labels.subscription_id: indexer-schema-changed + target: + type: {{ .Values.hpa.pubsubTargetType | quote }} + averageValue: {{ .Values.hpa.pubsubTargetValue }} + type: External + - resource: + name: cpu + target: + type: {{ .Values.hpa.cpuTargetType | quote }} + averageUtilization: {{ .Values.hpa.cpuTargetValue }} + type: Resource + - resource: + name: memory + target: + type: {{ .Values.hpa.memoryTargetType | quote }} + averageUtilization: {{ .Values.hpa.memoryTargetValue }} + type: Resource behavior: scaleUp: stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleUpStabilizationWindowSeconds }} policies: - - type: Percent + - type: Pods value: {{ .Values.hpa.behaviorScaleUpPoliciesValue }} periodSeconds: {{ .Values.hpa.behaviorScaleUpPoliciesPeriodSeconds }} scaleDown: stabilizationWindowSeconds: {{ .Values.hpa.behaviorScaleDownStabilizationWindowSeconds }} policies: - - type: Percent + - type: Pods value: {{ .Values.hpa.behaviorScaleDownPoliciesValue }} periodSeconds: {{ .Values.hpa.behaviorScaleDownPoliciesPeriodSeconds }} {{- end }} diff --git a/devops/gc/deploy/values.yaml b/devops/gc/deploy/values.yaml index a0bd52703..e0865d492 100644 --- a/devops/gc/deploy/values.yaml +++ b/devops/gc/deploy/values.yaml @@ -47,14 +47,18 @@ istio: hpa: minReplicas: 6 maxReplicas: 15 - targetType: "AverageValue" - targetValue: 340 #200rps*0.85.*2 - behaviorScaleUpStabilizationWindowSeconds: 10 - behaviorScaleUpPoliciesValue: 50 - behaviorScaleUpPoliciesPeriodSeconds: 15 + pubsubTargetType: AverageValue + pubsubTargetValue: 50 + cpuTargetType: Utilization + cpuTargetValue: 70 + memoryTargetType: Utilization + memoryTargetValue: 70 + behaviorScaleUpStabilizationWindowSeconds: 60 + behaviorScaleUpPoliciesValue: 1 + behaviorScaleUpPoliciesPeriodSeconds: 120 behaviorScaleDownStabilizationWindowSeconds: 60 - behaviorScaleDownPoliciesValue: 25 - behaviorScaleDownPoliciesPeriodSeconds: 60 + behaviorScaleDownPoliciesValue: 1 + behaviorScaleDownPoliciesPeriodSeconds: 120 limits: maxTokens: 200 #rps -- GitLab