delete KubeNodeHealth alert

This commit is contained in:
GLYASAI 2021-05-24 11:10:37 +08:00
parent b5f76479f2
commit 252ad830e1
9 changed files with 22 additions and 212 deletions

View File

@ -92,7 +92,7 @@ func ParseClientCommnad(args []string) {
App := cli.NewApp()
App.Version = "0.1"
App.Commands = []cli.Command{
cli.Command{
{
Name: "upgrade",
Flags: []cli.Flag{
cli.StringFlag{

View File

@ -21,28 +21,27 @@ package server
import (
"context"
"fmt"
"github.com/goodrain/rainbond/discover.v2"
"github.com/goodrain/rainbond/node/initiate"
"github.com/goodrain/rainbond/util/constants"
"k8s.io/client-go/kubernetes"
"os"
"os/signal"
"syscall"
"github.com/goodrain/rainbond/cmd/node/option"
"github.com/goodrain/rainbond/discover.v2"
eventLog "github.com/goodrain/rainbond/event"
"github.com/goodrain/rainbond/node/api"
"github.com/goodrain/rainbond/node/api/controller"
"github.com/goodrain/rainbond/node/core/store"
"github.com/goodrain/rainbond/node/initiate"
"github.com/goodrain/rainbond/node/kubecache"
"github.com/goodrain/rainbond/node/masterserver"
"github.com/goodrain/rainbond/node/nodem"
"github.com/goodrain/rainbond/node/nodem/docker"
"github.com/goodrain/rainbond/node/nodem/envoy"
"github.com/goodrain/rainbond/util/constants"
etcdutil "github.com/goodrain/rainbond/util/etcd"
k8sutil "github.com/goodrain/rainbond/util/k8s"
"github.com/sirupsen/logrus"
"k8s.io/client-go/kubernetes"
)
//Run start run

View File

@ -310,27 +310,6 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
&AlertingNameConfig{
Name: "ClusterHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "RbdNodeUnhealth",
Expr: "rainbond_cluster_node_health != 0",
For: "3m",
Labels: commonLables,
Annotations: map[string]string{"description": "cluster node {{ $labels.node_ip }} is unhealth"},
},
&RulesConfig{
Alert: "KubeNodeUnhealth",
Expr: "rainbond_cluster_component_health{component=\"KubeNodeReady\"} != 0",
For: "3m",
Labels: commonLables,
Annotations: map[string]string{"description": "kubernetes cluster node {{ $labels.node_ip }} is unhealth"},
},
&RulesConfig{
Alert: "ClusterCollectorTimeout",
Expr: "rainbond_cluster_collector_duration_seconds > 10",
For: "3m",
Labels: commonLables,
Annotations: map[string]string{"description": "Cluster collector '{{ $labels.instance }}' more than 10s"},
},
&RulesConfig{
Alert: "InsufficientClusteMemoryResources",
Expr: "max(rbd_api_exporter_cluster_memory_total) - max(sum(namespace_resource_memory_request) by (instance)) < 2048",

View File

@ -38,7 +38,7 @@ var kubecli kubecache.KubeClient
//Init 初始化
func Init(c *option.Conf, ms *masterserver.MasterServer, kube kubecache.KubeClient) {
if ms != nil {
prometheusService = service.CreatePrometheusService(c, ms)
prometheusService = service.CreatePrometheusService(c)
datacenterConfig = config.GetDataCenterConfig()
nodeService = service.CreateNodeService(c, ms.Cluster, kube)
}

View File

@ -19,33 +19,29 @@
package api
import (
"context"
"fmt"
"net/http"
"strconv"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/goodrain/rainbond/discover"
"github.com/goodrain/rainbond/node/kubecache"
"github.com/goodrain/rainbond/node/masterserver"
"github.com/goodrain/rainbond/node/statsd"
"github.com/goodrain/rainbond/node/api/controller"
"github.com/goodrain/rainbond/node/api/router"
"context"
"strings"
"github.com/goodrain/rainbond/cmd/node/option"
nodeclient "github.com/goodrain/rainbond/node/nodem/client"
_ "net/http/pprof"
client "github.com/coreos/etcd/clientv3"
"github.com/go-chi/chi"
"github.com/goodrain/rainbond/cmd/node/option"
"github.com/goodrain/rainbond/discover"
"github.com/goodrain/rainbond/node/api/controller"
"github.com/goodrain/rainbond/node/api/router"
"github.com/goodrain/rainbond/node/kubecache"
"github.com/goodrain/rainbond/node/masterserver"
nodeclient "github.com/goodrain/rainbond/node/nodem/client"
"github.com/goodrain/rainbond/node/statsd"
etcdutil "github.com/goodrain/rainbond/util/etcd"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
// pprof
_ "net/http/pprof"
)
//Manager api manager
@ -146,9 +142,6 @@ func (m *Manager) HandleClusterScrape(w http.ResponseWriter, r *http.Request) {
gatherers := prometheus.Gatherers{
prometheus.DefaultGatherer,
}
if m.ms != nil {
gatherers = append(gatherers, m.ms.GetRegistry())
}
// Delegate http serving to Prometheus client library, which will call collector.Collect.
h := promhttp.HandlerFor(gatherers,
promhttp.HandlerOpts{

View File

@ -21,7 +21,6 @@ package service
import (
"github.com/goodrain/rainbond/cmd/node/option"
"github.com/goodrain/rainbond/node/api/model"
"github.com/goodrain/rainbond/node/masterserver"
"github.com/goodrain/rainbond/node/utils"
)
@ -29,18 +28,16 @@ import (
type PrometheusService struct {
prometheusAPI *model.PrometheusAPI
conf *option.Conf
ms *masterserver.MasterServer
}
var prometheusService *PrometheusService
//CreatePrometheusService create prometheus service
func CreatePrometheusService(c *option.Conf, ms *masterserver.MasterServer) *PrometheusService {
func CreatePrometheusService(c *option.Conf) *PrometheusService {
if prometheusService == nil {
prometheusService = &PrometheusService{
prometheusAPI: &model.PrometheusAPI{API: c.PrometheusAPI},
conf: c,
ms: ms,
}
}
return prometheusService

View File

@ -1,59 +0,0 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package monitor
import (
"github.com/goodrain/rainbond/node/masterserver/node"
"github.com/prometheus/client_golang/prometheus"
)
//Manager Manager
type Manager interface {
Start(errchan chan error) error
Stop() error
GetRegistry() *prometheus.Registry
}
type manager struct {
clusterExporterRestry *prometheus.Registry
cluster *node.Cluster
}
//CreateManager CreateManager
func CreateManager(cluster *node.Cluster) (Manager, error) {
clusterRegistry := prometheus.NewRegistry()
manage := &manager{
clusterExporterRestry: clusterRegistry,
cluster: cluster,
}
return manage, nil
}
func (m *manager) Start(errchan chan error) error {
return m.clusterExporterRestry.Register(m.cluster)
}
func (m *manager) Stop() error {
return nil
}
func (m *manager) GetRegistry() *prometheus.Registry {
return m.clusterExporterRestry
}

View File

@ -1,80 +0,0 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2019 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package node
import (
"strconv"
"time"
"github.com/goodrain/rainbond/node/nodem/client"
"github.com/prometheus/client_golang/prometheus"
)
var (
namespace = "rainbond"
scrapeDurationDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "cluster", "collector_duration_seconds"),
"cluster_exporter: Duration of a collector scrape.",
[]string{},
nil,
)
nodeStatus = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "cluster", "node_health"),
"node_health: Rainbond node health status.",
[]string{"node_id", "node_ip", "status", "healthy"},
nil,
)
componentStatus = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "cluster", "component_health"),
"component_health: Rainbond node component health status.",
[]string{"node_id", "node_ip", "component"},
nil,
)
)
//Collect prometheus collect
func (n *Cluster) Collect(ch chan<- prometheus.Metric) {
begin := time.Now()
for _, node := range n.GetAllNode() {
ch <- prometheus.MustNewConstMetric(nodeStatus, prometheus.GaugeValue, func() float64 {
if node.Status == client.Running && node.NodeStatus.NodeHealth {
return 0
}
return 1
}(), node.ID, node.InternalIP, node.Status, strconv.FormatBool(node.NodeStatus.NodeHealth))
for _, con := range node.NodeStatus.Conditions {
ch <- prometheus.MustNewConstMetric(componentStatus, prometheus.GaugeValue, func() float64 {
if con.Status == client.ConditionTrue {
return 0
}
return 1
}(), node.ID, node.InternalIP, string(con.Type))
}
}
duration := time.Since(begin)
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds())
}
//Describe prometheus describe
func (n *Cluster) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeDurationDesc
ch <- nodeStatus
ch <- componentStatus
}

View File

@ -21,10 +21,6 @@ package masterserver
import (
"context"
"github.com/prometheus/client_golang/prometheus"
"github.com/goodrain/rainbond/node/masterserver/monitor"
"github.com/sirupsen/logrus"
"github.com/goodrain/rainbond/node/kubecache"
@ -43,7 +39,6 @@ type MasterServer struct {
ctx context.Context
cancel context.CancelFunc
datacenterConfig *config.DataCenterConfig
clusterMonitor monitor.Manager
}
//NewMasterServer 创建master节点
@ -51,11 +46,6 @@ func NewMasterServer(modelnode *client.HostNode, kubecli kubecache.KubeClient) (
datacenterConfig := config.GetDataCenterConfig()
ctx, cancel := context.WithCancel(context.Background())
nodecluster := node.CreateCluster(kubecli, modelnode, datacenterConfig)
clusterMonitor, err := monitor.CreateManager(nodecluster)
if err != nil {
cancel()
return nil, err
}
ms := &MasterServer{
Client: store.DefalutClient,
HostNode: modelnode,
@ -63,7 +53,6 @@ func NewMasterServer(modelnode *client.HostNode, kubecli kubecache.KubeClient) (
ctx: ctx,
cancel: cancel,
datacenterConfig: datacenterConfig,
clusterMonitor: clusterMonitor,
}
return ms, nil
}
@ -75,7 +64,7 @@ func (m *MasterServer) Start(errchan chan error) error {
logrus.Error("node cluster start error,", err.Error())
return err
}
return m.clusterMonitor.Start(errchan)
return nil
}
//Stop 停止
@ -83,13 +72,5 @@ func (m *MasterServer) Stop(i interface{}) {
if m.Cluster != nil {
m.Cluster.Stop(i)
}
if m.clusterMonitor != nil {
m.clusterMonitor.Stop()
}
m.cancel()
}
//GetRegistry get monitor metric registry
func (m *MasterServer) GetRegistry() *prometheus.Registry {
return m.clusterMonitor.GetRegistry()
}