mirror of
https://gitee.com/rainbond/Rainbond.git
synced 2024-12-02 19:57:42 +08:00
[REV] Optimize service restart policy when status is unhealthy.
This commit is contained in:
parent
31c25685f3
commit
4a37e55f37
@ -82,6 +82,17 @@ func (m *ControllerSystemd) RestartService(serviceName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *ControllerSystemd) StatusService(name string) error {
|
||||
cmd := fmt.Sprintf("systemctl status %s | head -3 | tail -1 | awk '{print $2}'", name)
|
||||
err := exec.Command("/usr/bin/bash", "-c", cmd).Run()
|
||||
if err != nil {
|
||||
logrus.Errorf("Restart service %s: %v", name, err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *ControllerSystemd) StartList(list []*service.Service) error {
|
||||
logrus.Info("Starting all services.")
|
||||
|
||||
|
@ -29,6 +29,8 @@ import (
|
||||
"github.com/goodrain/rainbond/node/nodem/service"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"time"
|
||||
"github.com/goodrain/rainbond/node/masterserver/node"
|
||||
)
|
||||
|
||||
type ManagerService struct {
|
||||
@ -144,26 +146,27 @@ func (m *ManagerService) StartSyncService() {
|
||||
return
|
||||
}
|
||||
|
||||
unhealthyNum := 0
|
||||
maxUnhealthyNum := 2
|
||||
|
||||
go func() {
|
||||
defer w.Close()
|
||||
unhealthyNum := 0
|
||||
maxUnhealthyNum := 2
|
||||
for {
|
||||
select {
|
||||
case event := <-w.Watch():
|
||||
switch event.Status {
|
||||
case service.Stat_healthy:
|
||||
logrus.Debugf("[%s] check service %s.", event.Status, event.Name)
|
||||
unhealthyNum = 0
|
||||
case service.Stat_unhealthy:
|
||||
logrus.Infof("[%s] check service %s %d times.", event.Status, event.Name, unhealthyNum)
|
||||
logrus.Debugf("[%s] check service %s %d times.", event.Status, event.Name, unhealthyNum)
|
||||
if unhealthyNum > maxUnhealthyNum {
|
||||
logrus.Infof("[%s] check service %s %d times and will be restart.", event.Status, event.Name, unhealthyNum)
|
||||
m.ctr.RestartService(event.Name)
|
||||
unhealthyNum = 0
|
||||
m.ctr.RestartService(event.Name)
|
||||
}
|
||||
unhealthyNum++
|
||||
case service.Stat_death:
|
||||
logrus.Infof("[%s] check service %s %d times.", event.Status, event.Name, unhealthyNum)
|
||||
logrus.Debugf("[%s] check service %s %d times.", event.Status, event.Name, unhealthyNum)
|
||||
if unhealthyNum > maxUnhealthyNum {
|
||||
logrus.Infof("[%s] check service %s %d times and will be start.", event.Status, event.Name, unhealthyNum)
|
||||
m.ctr.StartService(event.Name)
|
||||
@ -185,6 +188,22 @@ func (m *ManagerService) StopSyncService() {
|
||||
}
|
||||
}
|
||||
|
||||
func (m *ManagerService) WaitStart(name string, duration time.Duration) bool {
|
||||
max := time.Now().Add(duration)
|
||||
t := time.Tick(time.Second)
|
||||
|
||||
for {
|
||||
<-t
|
||||
status, _ := m.healthyManager.GetCurrentServiceHealthy(name)
|
||||
if status.Status == node.Running {
|
||||
return true
|
||||
}
|
||||
if time.Now().After(max) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
1. reload services config from local file system
|
||||
2. regenerate systemd config for all service
|
||||
|
@ -33,6 +33,7 @@ import (
|
||||
//Manager Manager
|
||||
type Manager interface {
|
||||
GetServiceHealthy(serviceName string) (*service.HealthStatus, bool)
|
||||
GetCurrentServiceHealthy(serviceName string) (*service.HealthStatus, error)
|
||||
WatchServiceHealthy(serviceName string) Watcher
|
||||
CloseWatch(serviceName string, id string) error
|
||||
Start(hostNode *client.HostNode) error
|
||||
|
Loading…
Reference in New Issue
Block a user