[REV] Optimize prometheus exit logic.

This commit is contained in:
Zhang Jiajun 2018-06-05 14:02:01 +08:00
parent cac0f0a8ed
commit ba0649bfc7
2 changed files with 48 additions and 33 deletions

View File

@ -19,11 +19,14 @@
package main
import (
"github.com/Sirupsen/logrus"
"github.com/goodrain/rainbond/cmd/monitor/option"
"github.com/spf13/pflag"
"github.com/goodrain/rainbond/monitor"
"github.com/goodrain/rainbond/monitor/prometheus"
"github.com/goodrain/rainbond/monitor/utils"
"github.com/spf13/pflag"
"os"
"os/signal"
"syscall"
)
func main() {
@ -36,7 +39,9 @@ func main() {
// start prometheus daemon and watching tis status in all time, exit monitor process if start failed
p := prometheus.NewManager(c)
p.StartDaemon()
errChan := make(chan error, 1)
defer close(errChan)
p.StartDaemon(errChan)
defer p.StopDaemon()
// register prometheus address to etcd cluster
@ -48,5 +53,18 @@ func main() {
m.Start()
defer m.Stop()
utils.ListenStop()
//step finally: listen Signal
term := make(chan os.Signal)
defer close(term)
signal.Notify(term, os.Interrupt, syscall.SIGTERM)
select {
case <-term:
logrus.Warn("Received SIGTERM, exiting monitor gracefully...")
case err := <-errChan:
if err != nil {
logrus.Errorf("Received a error %s from prometheus, exiting monitor gracefully...", err.Error())
}
}
logrus.Info("See you next time!")
}

View File

@ -19,29 +19,32 @@
package prometheus
import (
"context"
"fmt"
"github.com/Sirupsen/logrus"
"github.com/goodrain/rainbond/cmd/monitor/option"
"github.com/goodrain/rainbond/discover"
"github.com/prometheus/common/model"
"gopkg.in/yaml.v2"
"io/ioutil"
"net/http"
"sync"
"time"
"os"
"syscall"
"github.com/prometheus/common/model"
"net"
"fmt"
"net/http"
"os"
"sync"
"syscall"
"time"
"errors"
)
const (
STARTING = iota
STARTED
STOPPING
STOPPED
)
type Manager struct {
cancel context.CancelFunc
ctx context.Context
Opt *option.Config
Config *Config
Process *os.Process
@ -62,10 +65,10 @@ func NewManager(config *option.Config) *Manager {
}
m := &Manager{
Opt: config,
Config: &Config{
Opt: config,
Config: &Config{
GlobalConfig: GlobalConfig{
ScrapeInterval: model.Duration(time.Second * 5),
ScrapeInterval: model.Duration(time.Second * 5),
EvaluationInterval: model.Duration(time.Second * 10),
},
},
@ -78,9 +81,8 @@ func NewManager(config *option.Config) *Manager {
return m
}
func (p *Manager) StartDaemon() {
func (p *Manager) StartDaemon(errchan chan error) {
logrus.Info("Starting prometheus.")
p.Status = STARTING
// start prometheus
procAttr := &os.ProcAttr{
@ -108,31 +110,26 @@ func (p *Manager) StartDaemon() {
time.Sleep(time.Second)
}
p.Status = STARTED
p.Status = STARTED
// listen prometheus is exit
go func() {
p.Process.Wait()
logrus.Warn("Exited prometheus unexpected.")
if p.Status != STOPPING {
p.Status = STOPPING
logrus.Info("Send signal to monitor.")
self, err := os.FindProcess(os.Getpid())
if err == nil {
self.Signal(syscall.SIGTERM)
}
_, err := p.Process.Wait()
logrus.Warn("Exited prometheus unexpectedly.")
if err == nil {
err = errors.New("exited prometheus unexpectedly")
}
p.Status = STOPPED
errchan <- err
}()
}
func (p *Manager) StopDaemon() {
if p.Status != STOPPING {
p.Status = STOPPING
if p.Status != STOPPED {
logrus.Info("Stopping prometheus daemon ...")
p.Process.Signal(syscall.SIGTERM)
p.Process.Wait()
p.Status = STOPPED
logrus.Info("Stopped prometheus daemon.")
}
}
@ -150,14 +147,14 @@ func (p *Manager) RestartDaemon() error {
func (p *Manager) LoadConfig() error {
logrus.Info("Load prometheus config file.")
context, err := ioutil.ReadFile(p.Opt.ConfigFile)
content, err := ioutil.ReadFile(p.Opt.ConfigFile)
if err != nil {
logrus.Error("Failed to read prometheus config file: ", err)
logrus.Info("Init config file by default values.")
return nil
}
if err := yaml.Unmarshal(context, p.Config); err != nil {
if err := yaml.Unmarshal(content, p.Config); err != nil {
logrus.Error("Unmarshal prometheus config string to object error.", err.Error())
return err
}