Commit 00c659f2 authored by Vibhuti Sharma [Microsoft]'s avatar Vibhuti Sharma [Microsoft]
Browse files

dynamic configuration added

parent 4954abea
......@@ -29,10 +29,14 @@ data "azurerm_client_config" "current" {}
# Local Variables
#-------------------------------
locals {
action-group-ids = {
DevActionGroup = azurerm_monitor_action_group.DevActionGroup.id
}
action-group-id-suffix = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${data.azurerm_resource_group.rg.name}/providers/microsoft.insights/actiongroups/"
//x = var.example.storage.Exceptions.present
}
//output "example" {
// value = local.x
//}
#-------------------------------
# Resource Group
#-------------------------------
......@@ -41,59 +45,69 @@ data "azurerm_resource_group" "rg" {
}
#-------------------------------
# Action Group
# Action Groups
#-------------------------------
resource "azurerm_monitor_action_group" "DevActionGroup" {
name = var.action-groups["DevActionGroup"].name
resource "azurerm_monitor_action_group" "action-groups" {
count = length(var.action-groups)
name = var.action-groups[count.index].name
resource_group_name = data.azurerm_resource_group.rg.name
short_name = "vibsharm-ag"
short_name = var.action-groups[count.index].short-name
# There can be multiple email receivers
email_receiver {
name = var.action-groups["DevActionGroup"].email-receiver.name
email_address = var.action-groups["DevActionGroup"].email-receiver.email-address
use_common_alert_schema = false
# There can be 0 or more email receivers
dynamic "email_receiver" {
for_each = var.action-groups[count.index].email-receiver
content {
name = email_receiver.value.name
email_address = email_receiver.value.email-address
use_common_alert_schema = false
}
}
# There can be 0 or more sms receivers
dynamic "sms_receiver" {
for_each = var.action-groups[count.index].sms-receiver
content {
name = sms_receiver.value.name
country_code = sms_receiver.value.country-code
phone_number = sms_receiver.value.phone
}
}
}
#-------------------------------
# CPU Usage Alert Rule
# Custom Log Search Type Alerts
#-------------------------------
resource "azurerm_monitor_scheduled_query_rules_alert" "CPU-Usage-Alert" {
name = var.cpu-soft-limit-alert.alert-rule-name
resource "azurerm_monitor_scheduled_query_rules_alert" "alerts" {
count = length(var.log-alerts)
name = var.log-alerts[count.index].alert-rule-name
location = data.azurerm_resource_group.rg.location
resource_group_name = data.azurerm_resource_group.rg.name
action {
action_group = [for name in var.cpu-soft-limit-alert.service-specific-data.storage.action-group-name :
lookup(local.action-group-ids,name)
action_group = [for name in var.log-alerts[count.index].action-group-name :
format("%s%s", local.action-group-id-suffix, name)
]
}
/*dynamic "action" {
for_each = [for name in var.cpu-soft-limit-alert.service-specific-data.storage.action-group-name : {
resource-id = lookup(local.action-group-ids,name)
}]
content {
action_group = action.value.resource-id
}
}*/
data_source_id = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/osdu-mvp-crpentest-qw18-rg/providers/Microsoft.Insights/components/osdu-mvp-crpentest-qw18-ai"
description = var.cpu-soft-limit-alert.description
enabled = true
query = "performanceCounters\n| where cloud_RoleName == \"${var.cpu-soft-limit-alert.service-specific-data.storage.service-name}\"\n| where category == \"Processor\" and name == \"% Processor Time\"\n| summarize AggregatedValue = avg(value) by bin(timestamp, 5min)"
severity = var.cpu-soft-limit-alert.service-specific-data.storage.severity
frequency = var.cpu-soft-limit-alert.service-specific-data.storage.frequency
time_window = var.cpu-soft-limit-alert.service-specific-data.storage.time-window
data_source_id = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${var.central-group-prefix}-rg/providers/Microsoft.Insights/components/${var.central-group-prefix}-ai"
description = var.log-alerts[count.index].description
enabled = var.log-alerts[count.index].enabled
query = var.log-alerts[count.index].query
severity = var.log-alerts[count.index].severity
frequency = var.log-alerts[count.index].frequency
time_window = var.log-alerts[count.index].time-window
trigger {
operator = "GreaterThan"
threshold = var.cpu-soft-limit-alert.service-specific-data.storage.threshold
metric_trigger {
operator = "GreaterThan"
threshold = 0
metric_trigger_type = "Total"
metric_column = "timestamp"
operator = var.log-alerts[count.index].trigger-operator
threshold = var.log-alerts[count.index].trigger-threshold
dynamic "metric_trigger" {
# create this block only if alert is of `metric` type
for_each = var.log-alerts[count.index].metric-type ? [1] : []
content {
operator = var.log-alerts[count.index].metric-trigger-operator
threshold = var.log-alerts[count.index].metric-trigger-threshold
metric_trigger_type = var.log-alerts[count.index].metric-trigger-type
metric_column = var.log-alerts[count.index].metric-trigger-column
}
}
}
}
}
\ No newline at end of file
/*action-groups = {
DevActionGroup = {
name = "Dev",
email-receiver = {
email-address = "vibsharm@gmail.com",
name = "Dev Email"
}
},
ProdActionGroup = {
name = "Prod",
email-receiver = {
email-address = "vibsharm@gmail.com",
name = "Prod Email"
}
}
}*/
central-group-prefix = "xxxx"
/* This WORKS!!
action-groups = {
name = "Dev",
email-receiver = {
email-address = "vibsharm@gmail.com",
name = "Dev Email"
action-groups = [
{
name = "DevActionGroup",
short-name = "dev-ag",
email-receiver = [
{
name = "primary developer",
email-address = "xxx@microsoft.com",
common-alert-schema = false
},
{
name = "secondary developer",
email-address = "xxx@gmail.com",
common-alert-schema = false
}
],
sms-receiver = []
},
{
name = "ProdActionGroup",
short-name = "prod-ag",
email-receiver = [],
sms-receiver = [
{
name = "local support",
country-code = "91",
phone = "xxxxxxxxxx"
}
]
}
}*/
]
action-groups = {
DevActionGroup = {
name = "vibsharm-ag",
email-receiver = {
email-address = "vibsharm@microsoft.com",
name = "Dev Email"
}
log-alerts = [
#------------Storage Service Alerts----------------#
{
service-name = "storage",
alert-rule-name = "CPU Soft limit",
description = "CPU Soft limit alert rule for storage service",
# Alert based on metric measurement
metric-type = true
enabled = "false",
severity = 4,
frequency = 15,
time-window = 15,
action-group-name = ["ProdActionGroup", "DevActionGroup"],
query = "performanceCounters\n| where cloud_RoleName == \"storage\"\n| where category == \"Processor\" and name == \"% Processor Time\"\n| summarize AggregatedValue = avg(value) by bin(timestamp, 15min)",
# Threshold value for CPU usage which when exceeded will raise alert
trigger-threshold = 60,
trigger-operator = "GreaterThan",
metric-trigger-operator = "GreaterThan",
# Number of times the threshold value is allowed to exceed
metric-trigger-threshold = 0,
metric-trigger-type = "Total",
metric-trigger-column = "cloud_RoleName"
},
ProdActionGroup = {
name = "Prod",
email-receiver = {
email-address = "vibsharm@gmail.com",
name = "Prod Email"
}
{
service-name = "storage",
alert-rule-name = "Put Record Duration",
description = "Alert for duration of storage service PUT record API call",
# Alert based on Number of results hence metric-type is false
metric-type = false
enabled = "false",
severity = 3,
frequency = 5,
time-window = 5,
action-group-name = ["DevActionGroup"],
# Since it is alert based on number of results, the query must end with "| count"
query = "requests\n| where cloud_RoleName == \"storage\"\n| where name == \"GET /api/storage/v2/swagger-ui.html\"\n| where duration > 0\n| count",
# Number of results > 1 => alert is triggered.
trigger-threshold = 0,
trigger-operator = "GreaterThan",
#------ Supply dummy values for below variables since it is not a metric based alert -----#
metric-trigger-operator = null,
metric-trigger-threshold = null,
metric-trigger-type = null,
metric-trigger-column = null
}
}
# Exepectation -
/*DevActionGroup = {
name = "vibsharm-ag",
receiver = {
receiver-1 = {
type = email
email-address = "vibsharm@microsoft.com",
name = "Dev Email"
}
receiver-2 = {
type = SMS
number = "435",
name = "Dev Email"
}
}
}*/
#------------------Legal Service Alerts--------------------#
]
//example = {
// storage = {
// CPU = {
// name = "Hello"
// },
// Memory = {
// threshold = 9
// },
// Exceptions = {
// present = true
// }
// },
// legal = {
// Memory = {
// threshold = 5
// }
// }
//}
variable "storage-service-alerts" {
type = object({
cpu-soft-limit = object({
alert-rule-name = string,
description = string,
enabled = string,
severity = number,
threshold = number,
frequency = number,
time-window = number,
action-group-name = list(string),
query = string
}),
cpu-hard-limit = object({
alert-rule-name = string,
description = string,
enabled = string,
severity = number,
threshold = number,
frequency = number,
time-window = number,
action-group-name = list(string),
query = string
})
})
default = {
cpu-soft-limit = {
alert-rule-name = "CPU Soft limit",
description = "CPU Soft limit alert rule",
enabled = "true",
severity = 4,
threshold = 60,
frequency = 15,
time-window = 15,
action-group-name = ["DevActionGroup"],
query = "whole query"
},
cpu-hard-limit = {
alert-rule-name = "CPU Hard limit",
description = "CPU Hard limit alert rule",
enabled = "false",
severity = 3,
threshold = 70,
frequency = 5,
time-window = 15,
action-group-name = ["ProdActionGroup","DevActionGroup"],
query = "whole query"
}
}
variable "central-group-prefix" {
type = string
description = "prefix for the central resource group"
}
/* THIS WORKS!!!
variable "action-groups" {
type = object({
name = string,
email-receiver = map(string)
})
}*/
variable "action-groups" {
type = map(object({
name = string,
email-receiver = map(string)
type = list(object({
name = string,
short-name = string,
email-receiver = list(object({
name = string,
email-address = string,
common-alert-schema = bool
})),
sms-receiver = list(object({
name = string,
country-code = string,
phone = string
}))
}))
}
variable "log-alerts" {
type = list(object({
service-name = string,
alert-rule-name = string,
description = string,
# Alert can be based on metric measurement or based on number of results.
metric-type = bool,
enabled = string,
severity = number,
frequency = number,
time-window = number,
action-group-name = list(string),
query = string,
trigger-threshold = number,
trigger-operator = string,
# Type is `any` for the below keys as they need to be null if alert is based on number of results
metric-trigger-operator = any,
metric-trigger-threshold = any,
metric-trigger-type = any,
metric-trigger-column = any
}))
}
variable"cpu-soft-limit-alert"{
type=object({
service-specific-data = map(object({
service-name=string,
enabled=string,
severity=string,
threshold=number,
frequency=number,
time-window=number,
action-group-name=list(string)
}))
alert-rule-name=string,
description=string,
location=string
})
default={
service-specific-data= {
storage = {
service-name = "storage",
enabled = true,
severity = 3,
threshold = 0,
frequency = 5,
time-window = 5,
action-group-name = ["DevActionGroup"]
},
legal = {
service-name = "legal",
enabled = true,
severity = 4,
threshold = 70,
frequency = 15,
time-window = 15,
action-group-name = ["DevActionGroup"]
}
},
location="Central US",
alert-rule-name="CPU Soft Limit Alert",
description="Alerting when CPU Usage crosses soft limit threshold value"
}
}
\ No newline at end of file
/*variable "example" {
type = object({
storage = object({
CPU = object({
name = string
}),
Memory = object({
threshold = number
}),
Exceptions = object({
present = bool
})
}),
legal = object({
Memory = object({
threshold = number
})
})
})
}*/
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment