mirror of
https://gitee.com/rainbond/Rainbond.git
synced 2024-12-02 03:37:46 +08:00
change code
This commit is contained in:
parent
430880284f
commit
75c98f7834
@ -188,7 +188,7 @@ func (s *SelectBalance) Select(r *http.Request, endpoints EndpointList) Endpoint
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(endpoints) > 0 {
|
if len(endpoints) > 0 {
|
||||||
return endpoints[0]
|
return endpoints[len(endpoints)-1]
|
||||||
}
|
}
|
||||||
|
|
||||||
return Endpoint(s.hostIDMap["local"])
|
return Endpoint(s.hostIDMap["local"])
|
||||||
|
@ -40,6 +40,25 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
|
|||||||
a := &AlertingRulesManager{
|
a := &AlertingRulesManager{
|
||||||
RulesConfig: &AlertingRulesConfig{
|
RulesConfig: &AlertingRulesConfig{
|
||||||
Groups: []*AlertingNameConfig{
|
Groups: []*AlertingNameConfig{
|
||||||
|
&AlertingNameConfig{
|
||||||
|
Name: "GatewayHealth",
|
||||||
|
Rules: []*RulesConfig{
|
||||||
|
&RulesConfig{
|
||||||
|
Alert: "RequestMany",
|
||||||
|
Expr: "rate(gateway_requests[5m]) > 100",
|
||||||
|
For: "10s",
|
||||||
|
Labels: map[string]string{},
|
||||||
|
Annotations: map[string]string{"description": "http doamin {{ $labels.host }} per-second requests more than 100"},
|
||||||
|
},
|
||||||
|
&RulesConfig{
|
||||||
|
Alert: "FailureRequestMany",
|
||||||
|
Expr: "rate(gateway_requests{status=~\"5..\"}[5m]) > 5",
|
||||||
|
For: "10s",
|
||||||
|
Labels: map[string]string{},
|
||||||
|
Annotations: map[string]string{"description": "http doamin {{ $labels.host }} per-second failure requests more than 5"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
&AlertingNameConfig{
|
&AlertingNameConfig{
|
||||||
|
|
||||||
Name: "BuilderHealth",
|
Name: "BuilderHealth",
|
||||||
@ -49,7 +68,7 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
|
|||||||
Expr: "builder_exporter_health_status == 0",
|
Expr: "builder_exporter_health_status == 0",
|
||||||
For: "3m",
|
For: "3m",
|
||||||
Labels: map[string]string{},
|
Labels: map[string]string{},
|
||||||
Annotations: map[string]string{"summary": "builder unhealthy"},
|
Annotations: map[string]string{"description": "builder unhealthy"},
|
||||||
},
|
},
|
||||||
&RulesConfig{
|
&RulesConfig{
|
||||||
Alert: "BuilderTaskError",
|
Alert: "BuilderTaskError",
|
||||||
@ -133,10 +152,10 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
|
|||||||
},
|
},
|
||||||
&RulesConfig{
|
&RulesConfig{
|
||||||
Alert: "WebcliUnhealthy",
|
Alert: "WebcliUnhealthy",
|
||||||
Expr: "webcli_exporter_execute_command_failed > 100",
|
Expr: "rate(webcli_exporter_execute_command_failed[5m]) > 5",
|
||||||
For: "3m",
|
For: "3m",
|
||||||
Labels: map[string]string{},
|
Labels: map[string]string{},
|
||||||
Annotations: map[string]string{"summary": "The number of errors that occurred while executing the command was greater than 100."},
|
Annotations: map[string]string{"summary": "The number of errors that occurred while executing the command was greater than 5 per-second."},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -159,18 +178,18 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
|
|||||||
Annotations: map[string]string{"description": "{{ $labels.instance }} has a high load average. Load Average 5m is {{ humanize $value}}.", "summary": "HIGH LOAD AVERAGE WARNING ON '{{ $labels.instance }}'"},
|
Annotations: map[string]string{"description": "{{ $labels.instance }} has a high load average. Load Average 5m is {{ humanize $value}}.", "summary": "HIGH LOAD AVERAGE WARNING ON '{{ $labels.instance }}'"},
|
||||||
},
|
},
|
||||||
&RulesConfig{
|
&RulesConfig{
|
||||||
Alert: "node_running_out_of_disk_space",
|
Alert: "high_rootdisk_usage_on_node",
|
||||||
Expr: "(node_filesystem_size{mountpoint='/'} - node_filesystem_free{mountpoint='/'}) * 100 / node_filesystem_size{mountpoint='/'} > 80",
|
Expr: "(node_filesystem_size{mountpoint='/'} - node_filesystem_free{mountpoint='/'}) * 100 / node_filesystem_size{mountpoint='/'} > 75",
|
||||||
For: "5m",
|
For: "5m",
|
||||||
Labels: map[string]string{"service": "node_running_out_of_disk_space"},
|
Labels: map[string]string{"service": "high_rootdisk_usage_on_node"},
|
||||||
Annotations: map[string]string{"description": "More than 80% of disk used. Disk usage {{ humanize $value }}%.", "summary": "LOW DISK SPACE WARING:NODE '{{ $labels.instance }}"},
|
Annotations: map[string]string{"description": "More than 75% of disk used. Disk usage {{ humanize $value }} mountpoint {{ $labels.mountpoint }}%.", "summary": "LOW DISK SPACE WARING:NODE '{{ $labels.instance }}"},
|
||||||
},
|
},
|
||||||
&RulesConfig{
|
&RulesConfig{
|
||||||
Alert: "monitoring_service_down",
|
Alert: "high_dockerdisk_usage_on_node",
|
||||||
Expr: "up == 0",
|
Expr: "(node_filesystem_size{mountpoint='/var/lib/docker'} - node_filesystem_free{mountpoint='/var/lib/docker'}) * 100 / node_filesystem_size{mountpoint='/var/lib/docker'} > 75",
|
||||||
For: "5m",
|
For: "5m",
|
||||||
Labels: map[string]string{"service": "service_down"},
|
Labels: map[string]string{"service": "high_dockerdisk_usage_on_node"},
|
||||||
Annotations: map[string]string{"description": "The monitoring service '{{ $labels.job }}' is down.", "summary": "MONITORING SERVICE DOWN WARNING:NODE '{{ $labels.instance }}'"},
|
Annotations: map[string]string{"description": "More than 75% of disk used. Disk usage {{ humanize $value }} mountpoint {{ $labels.mountpoint }}%.", "summary": "LOW DISK SPACE WARING:NODE '{{ $labels.instance }}"},
|
||||||
},
|
},
|
||||||
&RulesConfig{
|
&RulesConfig{
|
||||||
Alert: "high_memory_usage_on_node",
|
Alert: "high_memory_usage_on_node",
|
||||||
@ -186,25 +205,25 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
|
|||||||
Name: "ClusterHealth",
|
Name: "ClusterHealth",
|
||||||
Rules: []*RulesConfig{
|
Rules: []*RulesConfig{
|
||||||
&RulesConfig{
|
&RulesConfig{
|
||||||
Alert: "cluster_unhealth",
|
Alert: "cluster_node_unhealth",
|
||||||
Expr: "rainbond_cluster_node_health != 0",
|
Expr: "rainbond_cluster_node_health != 0",
|
||||||
For: "3m",
|
For: "3m",
|
||||||
Labels: map[string]string{"service": "cluster_health"},
|
Labels: map[string]string{"service": "cluster_node_unhealth"},
|
||||||
Annotations: map[string]string{"summary": "!!!Dangerous, the current cluster is in an unhealthy state."},
|
Annotations: map[string]string{"description": "cluster node {{ $labels.node_ip }} is unhealth"},
|
||||||
},
|
},
|
||||||
&RulesConfig{
|
&RulesConfig{
|
||||||
Alert: "monitoring_component_status_unhealth",
|
Alert: "cluster_kube_node_unhealth",
|
||||||
Expr: "rainbond_cluster_component_health != 0",
|
Expr: "rainbond_cluster_component_health{component=\"KubeNodeReady\"} != 0",
|
||||||
For: "3m",
|
For: "3m",
|
||||||
Labels: map[string]string{"service": "component_unhealth"},
|
Labels: map[string]string{"service": "component_unhealth"},
|
||||||
Annotations: map[string]string{"description": "The monitoring component '{{ $labels.component }}' is down.", "summary": "MONITORING COMPONENT UNHEALTHY WARNING:NODE '{{ $labels.node_ip }}'"},
|
Annotations: map[string]string{"description": "kubernetes cluster node {{ $labels.node_ip }} is unhealth"},
|
||||||
},
|
},
|
||||||
&RulesConfig{
|
&RulesConfig{
|
||||||
Alert: "rainbond_cluster_collector_duration_seconds_timeout",
|
Alert: "rainbond_cluster_collector_duration_seconds_timeout",
|
||||||
Expr: "rainbond_cluster_collector_duration_seconds > 10",
|
Expr: "rainbond_cluster_collector_duration_seconds > 10",
|
||||||
For: "3m",
|
For: "3m",
|
||||||
Labels: map[string]string{"service": "cluster_collector"},
|
Labels: map[string]string{"service": "cluster_collector"},
|
||||||
Annotations: map[string]string{"summary": "Cluster collector '{{ $labels.instance }}' more than 10s"},
|
Annotations: map[string]string{"description": "Cluster collector '{{ $labels.instance }}' more than 10s"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user