[ADD] add entrance health check

This commit is contained in:
zhoujunhao 2018-07-11 14:51:51 +08:00
parent 9e12157285
commit 018bb5bbcb
8 changed files with 70 additions and 36 deletions

View File

@ -51,7 +51,7 @@ type TaskManager struct {
func NewTaskManager(c option.Config, exec exector.Manager) *TaskManager {
ctx, cancel := context.WithCancel(context.Background())
healthStatus["status"] = "health"
healthStatus["info"] = "service health"
healthStatus["info"] = "builder service health"
return &TaskManager{
ctx: ctx,
cancel: cancel,
@ -78,8 +78,6 @@ func (t *TaskManager) Start() error {
//Do do
func (t *TaskManager) Do() {
hostName, _ := os.Hostname()
timeoutNum := 0
errorNum := 0
for {
select {
case <-t.ctx.Done():
@ -101,20 +99,10 @@ func (t *TaskManager) Do() {
}
if grpc1.ErrorDesc(err) == "context timeout" {
logrus.Warn(err.Error())
timeoutNum += 1
if timeoutNum > 10 {
healthStatus["status"] = "unusual"
healthStatus["info"] = "context timeout more than ten times"
}
continue
}
logrus.Error(err.Error())
time.Sleep(time.Second * 2)
errorNum += 1
if errorNum > 10 {
healthStatus["status"] = "unusual"
healthStatus["info"] = err.Error()
}
continue
}
logrus.Debugf("Receive a task: %s", data.String())

View File

@ -185,6 +185,7 @@ func (e *exectorManager) buildFromImage(in []byte) {
} else {
i.Logger.Error("从镜像构建应用任务执行失败", map[string]string{"step": "callback", "status": "failure"})
status = "failure"
}
} else {
break

View File

@ -2,7 +2,7 @@ package exector
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/goodrain/rainbond/worker/discover"
"github.com/goodrain/rainbond/builder/discover"
)
// Metric name parts.
@ -57,10 +57,10 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) {
healthInfo := discover.HealthCheck()
healthStatus := healthInfo["status"]
var val float64
if healthStatus == "health"{
val = 0
}else {
if healthStatus == "health" {
val = 1
} else {
val = 0
}
ch <- prometheus.MustNewConstMetric(e.healthStatus.Desc(), prometheus.GaugeValue, val)

View File

@ -34,6 +34,7 @@ func Register(container *restful.Container, coreManager core.Manager, readStore
DomainSource{coreManager, readStore, apiStoreManager, 10000}.Register(container)
NodeSource{coreManager, readStore, apiStoreManager, clientSet}.Register(container)
PodSource{apiStoreManager}.Register(container)
HealthStatus{apiStoreManager}.Register(container)
}
//ResponseType 返回内容

View File

@ -0,0 +1,48 @@
// Copyright (C) 2014-2018 Goodrain Co., Ltd.
// RAINBOND, Application Management Platform
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package controller
import (
apistore "github.com/goodrain/rainbond/entrance/api/store"
"github.com/emicklei/go-restful"
)
//PodSource 查询应用实例的端口映射情况
type HealthStatus struct {
apiStoreManager *apistore.Manager
}
//Register 注册
func (h HealthStatus) Register(container *restful.Container) {
ws := new(restful.WebService)
ws.Path("/health").
Doc("Get pod some info").
Param(ws.PathParameter("pod_name", "pod name").DataType("string")).
Consumes(restful.MIME_XML, restful.MIME_JSON).
Produces(restful.MIME_JSON, restful.MIME_XML) // you can specify this per route as well
ws.Route(ws.GET("/").To(h.healthCheck)) // on the response
container.Add(ws)
}
func (h *HealthStatus) healthCheck(request *restful.Request, response *restful.Response) {
NewSuccessResponse(map[string]string{"status": "health", "info": ""}, nil, response)
}

View File

@ -49,6 +49,7 @@ type Exporter struct {
scrapeErrors *prometheus.CounterVec
lbPluginUp prometheus.Gauge
coreManager core.Manager
healthStatus prometheus.Gauge
}
//NewExporter new a exporter
@ -77,6 +78,12 @@ func NewExporter(coreManager core.Manager) *Exporter {
Name: "up",
Help: "Whether the default lb plugin is up.",
}),
healthStatus:prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: exporter,
Name: "entrance_health_status",
Help: "entrance component health status.",
}),
coreManager: coreManager,
}
}
@ -113,4 +120,5 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) {
logrus.Error("core manager scrape for prometheus error.", err.Error())
e.error.Set(1)
}
ch <- prometheus.MustNewConstMetric(e.healthStatus.Desc(), prometheus.GaugeValue, 1)
}

View File

@ -56,7 +56,7 @@ func NewTaskManager(c option.Config, executor executor.Manager, statusManager *s
ctx, cancel := context.WithCancel(context.Background())
handleManager := handle.NewManager(ctx, c, executor, statusManager)
healthStatus["status"] = "health"
healthStatus["info"] = "service health"
healthStatus["info"] = "worker service health"
return &TaskManager{
ctx: ctx,
cancel: cancel,
@ -84,8 +84,6 @@ func (t *TaskManager) Start() error {
func (t *TaskManager) Do() {
logrus.Info("start receive task from mq")
hostname, _ := os.Hostname()
timeoutNum := 0
errorNum := 0
for {
select {
case <-t.ctx.Done():
@ -105,20 +103,10 @@ func (t *TaskManager) Do() {
return
}
if grpc1.ErrorDesc(err) == "context timeout" {
timeoutNum += 1
if timeoutNum > 10 {
healthStatus["status"] = "unusual"
healthStatus["info"] = "context timeout more than ten times"
}
continue
}
logrus.Error("receive task error.", err.Error())
time.Sleep(time.Second * 2)
errorNum += 1
if errorNum > 10 {
healthStatus["status"] = "unusual"
healthStatus["info"] = err.Error()
}
continue
}
logrus.Debugf("receive a task: %v", data)

View File

@ -123,12 +123,12 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), "collect.fs")
healthInfo := discover.HealthCheck()
healthStatus :=healthInfo["status"]
healthStatus := healthInfo["status"]
var val float64
if healthStatus == "health"{
val = 0
}else {
if healthStatus == "health" {
val = 1
} else {
val = 0
}
ch <- prometheus.MustNewConstMetric(e.healthStatus.Desc(), prometheus.GaugeValue, val)
}
@ -171,8 +171,8 @@ func New(statusManager *status.AppRuntimeSyncClient) *Exporter {
Name: "appfs",
Help: "tenant service fs used.",
}, []string{"tenant_id", "service_id", "volume_type"}),
healthStatus:prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "worker",
healthStatus: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: "exporter",
Name: "worker_health_status",
Help: "worker component health status.",