support set alert rule region name

2024-11-29 18:27:58 +08:00 · 2020-07-18 16:56:57 +08:00 · 2020-07-18 16:56:57 +08:00 · d9da07274b
commit d9da07274b
parent 475d625347
1 changed files with 144 additions and 197 deletions
--- a/monitor/prometheus/rules_manager.go
+++ b/monitor/prometheus/rules_manager.go
@ -37,6 +37,21 @@ type AlertingRulesManager struct {

 //NewRulesManager new rule manager
 func NewRulesManager(config *option.Config) *AlertingRulesManager {
+	region := os.Getenv("REGION_NAME")
+	if region == "" {
+		region = "default"
+	}
+	commonLables := map[string]string{
+		"Alert":  "Rainbond",
+		"Region": region,
+	}
+	getseverityLables := func(severity string) map[string]string {
+		return map[string]string{
+			"Alert":    "Rainbond",
+			"severity": severity,
+			"Region":   region,
+		}
+	}
 	a := &AlertingRulesManager{
 		RulesConfig: &AlertingRulesConfig{
 			Groups: []*AlertingNameConfig{
@ -44,57 +59,47 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 					Name: "GatewayHealth",
 					Rules: []*RulesConfig{
 						&RulesConfig{
-							Alert: "GatewayDown",
-							Expr:  "absent(up{job=\"gateway\"})",
-							For:   "10s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "GatewayDown",
+							Expr:   "absent(up{job=\"gateway\"})",
+							For:    "10s",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "gateway node {{ $labels.instance }} is down, ",
 								"summary":     "gateway is down",
 							},
 						},
 						&RulesConfig{
-							Alert: "RequestSizeTooMuch",
-							Expr:  "sum by (instance, host) (rate(gateway_request_size_sum[5m])) > 1024*1024*10",
-							For:   "20s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "RequestSizeTooMuch",
+							Expr:   "sum by (instance, host) (rate(gateway_request_size_sum[5m])) > 1024*1024*10",
+							For:    "20s",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "http doamin {{ $labels.host }} per-second request size {{ humanize $value }}, more than 10M",
 								"summary":     "Too much traffic",
 							},
 						},
 						&RulesConfig{
-							Alert: "ResponseSizeTooMuch",
-							Expr:  "sum by (instance, host) (rate(gateway_response_size_sum[5m])) > 1024*1024*10",
-							For:   "20s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "ResponseSizeTooMuch",
+							Expr:   "sum by (instance, host) (rate(gateway_response_size_sum[5m])) > 1024*1024*10",
+							For:    "20s",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "http doamin {{ $labels.host }} per-second response size {{ humanize $value }}, more than 10M",
 								"summary":     "Too much traffic",
 							},
 						},
 						&RulesConfig{
-							Alert: "RequestMany",
-							Expr:  "rate(gateway_requests[5m]) > 200",
-							For:   "10s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "RequestMany",
+							Expr:        "rate(gateway_requests[5m]) > 200",
+							For:         "10s",
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "http doamin {{ $labels.host }} per-second requests {{ humanize $value }}, more than 200"},
 						},
 						&RulesConfig{
-							Alert: "FailureRequestMany",
-							Expr:  "rate(gateway_requests{status=~\"5..\"}[5m]) > 5",
-							For:   "10s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "FailureRequestMany",
+							Expr:        "rate(gateway_requests{status=~\"5..\"}[5m]) > 5",
+							For:         "10s",
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "http doamin {{ $labels.host }} per-second failure requests {{ humanize $value }}, more than 5"},
 						},
 					},
@ -103,33 +108,27 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 					Name: "BuilderHealth",
 					Rules: []*RulesConfig{
 						&RulesConfig{
-							Alert: "BuilderDown",
-							Expr:  "absent(up{component=\"builder\"})",
-							For:   "10s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "BuilderDown",
+							Expr:   "absent(up{component=\"builder\"})",
+							For:    "10s",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "builder(rbd-chaos) node {{ $labels.instance }} is down, ",
 								"summary":     "builder(rbd-chaos) is down",
 							},
 						},
 						&RulesConfig{
-							Alert: "BuilderUnhealthy",
-							Expr:  "builder_exporter_health_status == 0",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "BuilderUnhealthy",
+							Expr:        "builder_exporter_health_status == 0",
+							For:         "3m",
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "builder unhealthy"},
 						},
 						&RulesConfig{
-							Alert: "BuilderTaskError",
-							Expr:  "builder_exporter_builder_current_concurrent_task == builder_exporter_builder_max_concurrent_task",
-							For:   "20s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "BuilderTaskError",
+							Expr:        "builder_exporter_builder_current_concurrent_task == builder_exporter_builder_max_concurrent_task",
+							For:         "20s",
+							Labels:      commonLables,
 							Annotations: map[string]string{"summary": "The build service is performing a maximum number of tasks"},
 						},
 					},
@ -138,36 +137,30 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 					Name: "WorkerHealth",
 					Rules: []*RulesConfig{
 						&RulesConfig{
-							Alert: "WorkerDown",
-							Expr:  "absent(up{component=\"worker\"})",
-							For:   "10s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "WorkerDown",
+							Expr:   "absent(up{component=\"worker\"})",
+							For:    "10s",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "worker node {{ $labels.instance }} is down",
 								"summary":     "worker is down",
 							},
 						},
 						&RulesConfig{
-							Alert: "WorkerUnhealthy",
-							Expr:  "app_resource_exporter_health_status == 0",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "WorkerUnhealthy",
+							Expr:   "app_resource_exporter_health_status == 0",
+							For:    "3m",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"summary":     "worker unhealthy",
 								"description": "worker node {{ $labels.instance }} is unhealthy",
 							},
 						},
 						&RulesConfig{
-							Alert: "WorkerTaskError",
-							Expr:  "app_resource_exporter_worker_task_error > 50",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "WorkerTaskError",
+							Expr:   "app_resource_exporter_worker_task_error > 50",
+							For:    "3m",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "worker node {{ $labels.instance }} execution task error number is greater than 50",
 							},
@ -178,33 +171,27 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 					Name: "MqHealth",
 					Rules: []*RulesConfig{
 						&RulesConfig{
-							Alert: "MqDown",
-							Expr:  "absent(up{component=\"mq\"})",
-							For:   "20s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "MqDown",
+							Expr:   "absent(up{component=\"mq\"})",
+							For:    "20s",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "mq node {{ $labels.instance }} is down",
 								"summary":     "mq is down",
 							},
 						},
 						&RulesConfig{
-							Alert: "MqUnhealthy",
-							Expr:  "acp_mq_exporter_health_status == 0",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "MqUnhealthy",
+							Expr:        "acp_mq_exporter_health_status == 0",
+							For:         "3m",
+							Labels:      commonLables,
 							Annotations: map[string]string{"summary": "mq unhealthy"},
 						},
 						&RulesConfig{
-							Alert: "TeamTaskMany",
-							Expr:  "acp_mq_dequeue_number-acp_mq_enqueue_number > 200",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "TeamTaskMany",
+							Expr:        "acp_mq_dequeue_number-acp_mq_enqueue_number > 200",
+							For:         "3m",
+							Labels:      commonLables,
 							Annotations: map[string]string{"summary": "The number of tasks in the queue is greater than 200"},
 						},
 					},
@ -213,21 +200,17 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 					Name: "EventlogHealth",
 					Rules: []*RulesConfig{
 						&RulesConfig{
-							Alert: "EventLogUnhealthy",
-							Expr:  "event_log_exporter_health_status == 0",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "EventLogUnhealthy",
+							Expr:        "event_log_exporter_health_status == 0",
+							For:         "3m",
+							Labels:      commonLables,
 							Annotations: map[string]string{"summary": "eventlog unhealthy"},
 						},
 						&RulesConfig{
-							Alert: "EventLogDown",
-							Expr:  "absent(up{component=\"eventlog\"})",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "EventLogDown",
+							Expr:   "absent(up{component=\"eventlog\"})",
+							For:    "3m",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "worker node {{ $labels.instance }} is down",
 								"summary":     "eventlog service down",
@ -239,33 +222,27 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 					Name: "WebcliHealth",
 					Rules: []*RulesConfig{
 						&RulesConfig{
-							Alert: "WebcliDown",
-							Expr:  "absent(up{component=\"webcli\"})",
-							For:   "20s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "WebcliDown",
+							Expr:   "absent(up{component=\"webcli\"})",
+							For:    "20s",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "webcli node {{ $labels.instance }} is down",
 								"summary":     "webcli is down",
 							},
 						},
 						&RulesConfig{
-							Alert: "WebcliUnhealthy",
-							Expr:  "webcli_exporter_health_status == 0",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "WebcliUnhealthy",
+							Expr:        "webcli_exporter_health_status == 0",
+							For:         "3m",
+							Labels:      commonLables,
 							Annotations: map[string]string{"summary": "webcli unhealthy"},
 						},
 						&RulesConfig{
-							Alert: "WebcliUnhealthy",
-							Expr:  "rate(webcli_exporter_execute_command_failed[5m]) > 5",
-							For:   "3m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:       "WebcliUnhealthy",
+							Expr:        "rate(webcli_exporter_execute_command_failed[5m]) > 5",
+							For:         "3m",
+							Labels:      commonLables,
 							Annotations: map[string]string{"summary": "The number of errors that occurred while executing the command was greater than 5 per-second."},
 						},
 					},
@ -274,12 +251,10 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 					Name: "NodeHealth",
 					Rules: []*RulesConfig{
 						&RulesConfig{
-							Alert: "NodeDown",
-							Expr:  "absent(up{component=\"rbd_node\"})",
-							For:   "30s",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "NodeDown",
+							Expr:   "absent(up{component=\"rbd_node\"})",
+							For:    "30s",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "node {{ $labels.instance }} is down",
 								"summary":     "rbd_node is down",
@ -289,42 +264,42 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 							Alert:       "high_cpu_usage_on_node",
 							Expr:        "sum by(instance) (rate(process_cpu_seconds_total[5m])) * 100 > 70",
 							For:         "5m",
-							Labels:      map[string]string{"Alert": "Rainbond"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "{{ $labels.instance }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%.", "summary": "HIGH CPU USAGE WARNING ON '{{ $labels.instance }}'"},
 						},
 						&RulesConfig{
 							Alert:       "high_la_usage_on_node",
 							Expr:        "count by (instance) (node_load5) > count by(instance)(count by(job, instance, cpu)(node_cpu))",
 							For:         "5m",
-							Labels:      map[string]string{"Alert": "Rainbond"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "{{ $labels.instance }} has a high load average. Load Average 5m is {{ humanize $value}}.", "summary": "HIGH LOAD AVERAGE WARNING ON '{{ $labels.instance }}'"},
 						},
 						&RulesConfig{
 							Alert:       "inode_freerate_low",
 							Expr:        "node_filesystem_files_free{fstype=~\"ext4|xfs\"} / node_filesystem_files{fstype=~\"ext4|xfs\"} < 0.3",
 							For:         "5m",
-							Labels:      map[string]string{"service": "node_filesystem_files_free"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "the inode free rate is low of node {{ $labels.instance }}, current value is {{ humanize $value}}."},
 						},
 						&RulesConfig{
 							Alert:       "high_rootdisk_usage_on_node",
 							Expr:        "(node_filesystem_size{mountpoint='/'} - node_filesystem_free{mountpoint='/'}) * 100 / node_filesystem_size{mountpoint='/'} > 75",
 							For:         "5m",
-							Labels:      map[string]string{"Alert": "Rainbond"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "More than 75% of disk used. Disk usage {{ humanize $value }} mountpoint {{ $labels.mountpoint }}%.", "summary": "LOW DISK SPACE WARING:NODE '{{ $labels.instance }}"},
 						},
 						&RulesConfig{
 							Alert:       "high_dockerdisk_usage_on_node",
 							Expr:        "(node_filesystem_size{mountpoint='/var/lib/docker'} - node_filesystem_free{mountpoint='/var/lib/docker'}) * 100 / node_filesystem_size{mountpoint='/var/lib/docker'} > 75",
 							For:         "5m",
-							Labels:      map[string]string{"Alert": "Rainbond"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "More than 75% of disk used. Disk usage {{ humanize $value }} mountpoint {{ $labels.mountpoint }}%.", "summary": "LOW DISK SPACE WARING:NODE '{{ $labels.instance }}"},
 						},
 						&RulesConfig{
 							Alert:       "high_memory_usage_on_node",
 							Expr:        "((node_memory_MemTotal - node_memory_MemAvailable) / node_memory_MemTotal) * 100 > 80",
 							For:         "5m",
-							Labels:      map[string]string{"Alert": "Rainbond"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "{{ $labels.instance }} is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}%.", "summary": "HIGH MEMORY USAGE WARNING TASK ON '{{ $labels.instance }}'"},
 						},
 					},
@ -336,21 +311,21 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 							Alert:       "cluster_node_unhealth",
 							Expr:        "rainbond_cluster_node_health != 0",
 							For:         "3m",
-							Labels:      map[string]string{"Alert": "Rainbond"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "cluster node {{ $labels.node_ip }} is unhealth"},
 						},
 						&RulesConfig{
 							Alert:       "cluster_kube_node_unhealth",
 							Expr:        "rainbond_cluster_component_health{component=\"KubeNodeReady\"} != 0",
 							For:         "3m",
-							Labels:      map[string]string{"Alert": "Rainbond"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "kubernetes cluster node {{ $labels.node_ip }} is unhealth"},
 						},
 						&RulesConfig{
 							Alert:       "rainbond_cluster_collector_duration_seconds_timeout",
 							Expr:        "rainbond_cluster_collector_duration_seconds > 10",
 							For:         "3m",
-							Labels:      map[string]string{"Alert": "Rainbond"},
+							Labels:      commonLables,
 							Annotations: map[string]string{"description": "Cluster collector '{{ $labels.instance }}' more than 10s"},
 						},
 					},
@ -359,128 +334,100 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
 					Name: "EtcdHealth",
 					Rules: []*RulesConfig{
 						&RulesConfig{
-							Alert: "EtcdDown",
-							Expr:  "absent(up{component=\"etcd\"})",
-							For:   "1m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "EtcdDown",
+							Expr:   "absent(up{component=\"etcd\"})",
+							For:    "1m",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "etcd node {{ $labels.instance }} is down, ",
 								"summary":     "etcd node is down",
 							},
 						},
 						&RulesConfig{
-							Alert: "EtcdLoseLeader",
-							Expr:  "etcd_server_has_leader == 0",
-							For:   "1m",
-							Labels: map[string]string{
-								"Alert": "Rainbond",
-							},
+							Alert:  "EtcdLoseLeader",
+							Expr:   "etcd_server_has_leader == 0",
+							For:    "1m",
+							Labels: commonLables,
 							Annotations: map[string]string{
 								"description": "etcd node {{ $labels.instance }} is lose leader",
 								"summary":     "etcd lose leader",
 							},
 						},
 						&RulesConfig{
-							Alert: "InsufficientMembers",
-							Expr:  "count(up{job=\"etcd\"} == 0) > (count(up{job=\"etcd\"}) / 2 - 1)",
-							For:   "1m",
-							Labels: map[string]string{
-								"severity": "critical",
-								"Alert":    "Rainbond",
-							},
+							Alert:  "InsufficientMembers",
+							Expr:   "count(up{job=\"etcd\"} == 0) > (count(up{job=\"etcd\"}) / 2 - 1)",
+							For:    "1m",
+							Labels: getseverityLables("critical"),
 							Annotations: map[string]string{
 								"description": "If one more etcd member goes down the cluster will be unavailable",
 								"summary":     "etcd cluster insufficient members",
 							},
 						},
 						&RulesConfig{
-							Alert: "HighNumberOfLeaderChanges",
-							Expr:  "increase(etcd_server_leader_changes_seen_total{job=\"etcd\"}[1h]) > 3",
-							For:   "1m",
-							Labels: map[string]string{
-								"severity": "warning",
-								"Alert":    "Rainbond",
-							},
+							Alert:  "HighNumberOfLeaderChanges",
+							Expr:   "increase(etcd_server_leader_changes_seen_total{job=\"etcd\"}[1h]) > 3",
+							For:    "1m",
+							Labels: getseverityLables("warning"),
 							Annotations: map[string]string{
 								"description": "etcd instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour",
 								"summary":     "a high number of leader changes within the etcd cluster are happening",
 							},
 						},
 						&RulesConfig{
-							Alert: "HighNumberOfFailedGRPCRequests",
-							Expr:  "sum(rate(etcd_grpc_requests_failed_total{job=\"etcd\"}[5m])) BY (grpc_method) / sum(rate(etcd_grpc_total{job=\"etcd\"}[5m])) BY (grpc_method) > 0.05",
-							For:   "5m",
-							Labels: map[string]string{
-								"severity": "critical",
-								"Alert":    "Rainbond",
-							},
+							Alert:  "HighNumberOfFailedGRPCRequests",
+							Expr:   "sum(rate(etcd_grpc_requests_failed_total{job=\"etcd\"}[5m])) BY (grpc_method) / sum(rate(etcd_grpc_total{job=\"etcd\"}[5m])) BY (grpc_method) > 0.05",
+							For:    "5m",
+							Labels: getseverityLables("critical"),
 							Annotations: map[string]string{
 								"description": "{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}",
 								"summary":     "a high number of gRPC requests are failing",
 							},
 						},
 						&RulesConfig{
-							Alert: "HighNumberOfFailedHTTPRequests",
-							Expr:  "sum(rate(etcd_http_failed_total{job=\"etcd\"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=\"etcd\"}[5m]))BY (method) > 0.05",
-							For:   "1m",
-							Labels: map[string]string{
-								"severity": "critical",
-								"Alert":    "Rainbond",
-							},
+							Alert:  "HighNumberOfFailedHTTPRequests",
+							Expr:   "sum(rate(etcd_http_failed_total{job=\"etcd\"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=\"etcd\"}[5m]))BY (method) > 0.05",
+							For:    "1m",
+							Labels: getseverityLables("critical"),
 							Annotations: map[string]string{
 								"description": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
 								"summary":     "a high number of HTTP requests are failing",
 							},
 						},
 						&RulesConfig{
-							Alert: "GRPCRequestsSlow",
-							Expr:  "histogram_quantile(0.99, rate(etcd_grpc_unary_requests_duration_seconds_bucket[5m])) > 0.15",
-							For:   "1m",
-							Labels: map[string]string{
-								"severity": "critical",
-								"Alert":    "Rainbond",
-							},
+							Alert:  "GRPCRequestsSlow",
+							Expr:   "histogram_quantile(0.99, rate(etcd_grpc_unary_requests_duration_seconds_bucket[5m])) > 0.15",
+							For:    "1m",
+							Labels: getseverityLables("critical"),
 							Annotations: map[string]string{
 								"description": "on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method}} are slow",
 								"summary":     "slow gRPC requests",
 							},
 						},
 						&RulesConfig{
-							Alert: "HighNumberOfFailedHTTPRequests",
-							Expr:  "sum(rate(etcd_http_failed_total{job=\"etcd\"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=\"etcd\"}[5m]))BY (method) > 0.05",
-							For:   "1m",
-							Labels: map[string]string{
-								"severity": "critical",
-								"Alert":    "Rainbond",
-							},
+							Alert:  "HighNumberOfFailedHTTPRequests",
+							Expr:   "sum(rate(etcd_http_failed_total{job=\"etcd\"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=\"etcd\"}[5m]))BY (method) > 0.05",
+							For:    "1m",
+							Labels: getseverityLables("critical"),
 							Annotations: map[string]string{
 								"description": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
 								"summary":     "a high number of HTTP requests are failing",
 							},
 						},
 						&RulesConfig{
-							Alert: "HighNumberOfFailedHTTPRequests",
-							Expr:  "sum(rate(etcd_http_failed_total{job=\"etcd\"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=\"etcd\"}[5m]))BY (method) > 0.05",
-							For:   "1m",
-							Labels: map[string]string{
-								"severity": "critical",
-								"Alert":    "Rainbond",
-							},
+							Alert:  "HighNumberOfFailedHTTPRequests",
+							Expr:   "sum(rate(etcd_http_failed_total{job=\"etcd\"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=\"etcd\"}[5m]))BY (method) > 0.05",
+							For:    "1m",
+							Labels: getseverityLables("critical"),
 							Annotations: map[string]string{
 								"description": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
 								"summary":     "a high number of HTTP requests are failing",
 							},
 						},
 						&RulesConfig{
-							Alert: "DatabaseSpaceExceeded",
-							Expr:  "etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes > 0.80",
-							For:   "1m",
-							Labels: map[string]string{
-								"severity": "critical",
-								"Alert":    "Rainbond",
-							},
+							Alert:  "DatabaseSpaceExceeded",
+							Expr:   "etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes > 0.80",
+							For:    "1m",
+							Labels: getseverityLables("critical"),
 							Annotations: map[string]string{
 								"description": "{{ $labels.instance }}, {{ $labels.job }} of etcd DB space uses more than 80%",
 								"summary":     "Etcd DB space is overused",