From 4470bf64ee7fd77bd1502f1576aa4cb4ec90446d Mon Sep 17 00:00:00 2001 From: Zhang Jiajun Date: Sat, 2 Jun 2018 10:45:11 +0800 Subject: [PATCH] [ADD] New feature monitoring component. --- cmd/monitor/main.go | 16 ++- cmd/monitor/option/option.go | 54 ++++++++-- hack/contrib/docker/monitor/Dockerfile | 6 +- monitor/callback/app.go | 21 +++- monitor/callback/appstatus.go | 15 ++- monitor/callback/entrance.go | 7 +- monitor/callback/etcd.go | 7 +- monitor/callback/eventlog.go | 7 +- monitor/callback/node.go | 10 +- monitor/callback/promethes.go | 15 ++- monitor/monitor.go | 76 ++++---------- monitor/prometheus/config.go | 6 +- monitor/prometheus/manager.go | 139 ++++++++++++++----------- release.sh | 3 +- 14 files changed, 220 insertions(+), 162 deletions(-) diff --git a/cmd/monitor/main.go b/cmd/monitor/main.go index 2f343828a..775bc6f96 100644 --- a/cmd/monitor/main.go +++ b/cmd/monitor/main.go @@ -22,6 +22,7 @@ import ( "github.com/goodrain/rainbond/cmd/monitor/option" "github.com/spf13/pflag" "github.com/goodrain/rainbond/monitor" + "github.com/goodrain/rainbond/monitor/prometheus" ) func main() { @@ -31,6 +32,19 @@ func main() { c.CompleteConfig() - m := monitor.NewMonitor(c) + // start prometheus daemon and watching tis status in all time, exit monitor process if start failed + p := prometheus.NewManager(c) + p.StartDaemon() + defer p.StopDaemon() + + // register prometheus address to etcd cluster + p.Registry.Start() + defer p.Registry.Stop() + + // start watching components from etcd, and update modify to prometheus config + m := monitor.NewMonitor(c, p) m.Start() + defer m.Stop() + + m.ListenStop() } diff --git a/cmd/monitor/option/option.go b/cmd/monitor/option/option.go index 2b81e4223..ce45f784f 100644 --- a/cmd/monitor/option/option.go +++ b/cmd/monitor/option/option.go @@ -23,6 +23,8 @@ import ( "github.com/Sirupsen/logrus" "fmt" "os" + "strings" + "strconv" ) type Config struct { @@ -31,28 +33,63 @@ type Config struct { ConfigFile string BindIp string Port int + Options string + Args []string } func NewConfig() *Config { - h, _ := os.Hostname() - return &Config{ + host, _ := os.Hostname() + + config := &Config{ EtcdEndpoints: []string{"http://127.0.0.1:2379"}, - LogLevel: "info", ConfigFile: "/etc/prometheus/prometheus.yml", - BindIp: h, + BindIp: host, Port: 9999, + LogLevel: "info", } + + defaultOptions := "--web.listen-address=%s:%d --config.file=%s --storage.tsdb.path=/prometheusdata --storage.tsdb.retention=7d --log.level=%s" + defaultOptions = fmt.Sprintf(defaultOptions, config.BindIp, config.Port, config.ConfigFile, config.LogLevel) + + config.Options = defaultOptions + return config } func (c *Config) AddFlag(cmd *pflag.FlagSet) { cmd.StringArrayVar(&c.EtcdEndpoints, "etcd-endpoints", c.EtcdEndpoints, "etcd endpoints list") - cmd.StringVar(&c.LogLevel, "log-level", c.LogLevel, "log level") - cmd.StringVar(&c.ConfigFile, "config-file", c.ConfigFile, "prometheus config file path") - cmd.StringVar(&c.BindIp, "bind-ip", c.BindIp, "prometheus bind ip") - cmd.IntVar(&c.Port, "port", c.Port, "prometheus listen port") + cmd.StringVar(&c.Options, "prometheus-options", c.Options, "specified options for prometheus") } func (c *Config) CompleteConfig() { + // parse values from prometheus options to config + args := strings.Split(c.Options, " ") + for i := 0; i < len(args); i++ { + kv := strings.Split(args[i], "=") + if len(kv) < 2 { + kv = append(kv, args[i]) + i++ + } + + switch kv[0] { + case "--web.listen-address": + ipPort := strings.Split(kv[1], ":") + if ipPort[0] != "" { + c.BindIp = ipPort[0] + } + port, err := strconv.Atoi(ipPort[1]) + if err == nil && port != 0 { + c.Port = port + } + case "--config.file": + c.ConfigFile = kv[1] + case "--log.level": + c.LogLevel = kv[1] + } + } + + c.Args = append(c.Args, os.Args[0]) + c.Args = append(c.Args, args...) + level, err := logrus.ParseLevel(c.LogLevel) if err != nil { fmt.Println("ERROR set log level:", err) @@ -60,4 +97,5 @@ func (c *Config) CompleteConfig() { } logrus.SetLevel(level) + logrus.Info("Start with options: ", c) } diff --git a/hack/contrib/docker/monitor/Dockerfile b/hack/contrib/docker/monitor/Dockerfile index 58652ba33..b10b82f19 100644 --- a/hack/contrib/docker/monitor/Dockerfile +++ b/hack/contrib/docker/monitor/Dockerfile @@ -1,10 +1,10 @@ FROM prom/prometheus:v2.2.1 +USER root VOLUME ["/prometheusdata"] ENV RELEASE_DESC=__RELEASE_DESC__ -ADD rainbond-monitor /run/monitor +COPY rainbond-monitor /usr/bin/monitor -ENTRYPOINT [""] -CMD [ "/run/monitor" ] \ No newline at end of file +ENTRYPOINT ["/usr/bin/monitor"] \ No newline at end of file diff --git a/monitor/callback/app.go b/monitor/callback/app.go index cf589d5cf..5fd9b80b5 100644 --- a/monitor/callback/app.go +++ b/monitor/callback/app.go @@ -27,6 +27,7 @@ import ( "github.com/prometheus/common/model" "github.com/tidwall/gjson" "time" + "github.com/goodrain/rainbond/monitor/utils" ) // App 指app运行时信息,来源于所有子节点上的node @@ -40,8 +41,17 @@ type App struct { } func (e *App) UpdateEndpoints(endpoints ...*config.Endpoint) { + newArr := utils.TrimAndSort(endpoints) - return + if utils.ArrCompare(e.sortedEndpoints, newArr) { + logrus.Debugf("The endpoints is not modify: %s", e.Name()) + return + } + + e.sortedEndpoints = newArr + + scrape := e.toScrape() + e.Prometheus.UpdateScrape(scrape) } func (e *App) Error(err error) { @@ -53,9 +63,9 @@ func (e *App) Name() string { } func (e *App) toScrape() *prometheus.ScrapeConfig { - ts := make([]model.LabelSet, 0, len(e.sortedEndpoints)) + ts := make([]string, 0, len(e.sortedEndpoints)) for _, end := range e.sortedEndpoints { - ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)}) + ts = append(ts, end) } return &prometheus.ScrapeConfig{ @@ -69,7 +79,7 @@ func (e *App) toScrape() *prometheus.ScrapeConfig { { Targets: ts, Labels: map[model.LabelName]model.LabelValue{ - "component": "acp_entrance", + "component": model.LabelValue(e.Name()), }, }, }, @@ -104,7 +114,8 @@ func (e *App) Modify(event *watch.Event) { func (e *App) Delete(event *watch.Event) { for i, end := range e.endpoints { - if end.URL == event.GetValueString() { + url := gjson.Get(event.GetValueString(), "external_ip").String() + ":6100" + if end.URL == url { e.endpoints = append(e.endpoints[:i], e.endpoints[i+1:]...) e.UpdateEndpoints(e.endpoints...) break diff --git a/monitor/callback/appstatus.go b/monitor/callback/appstatus.go index d5e329b2e..764772ed9 100644 --- a/monitor/callback/appstatus.go +++ b/monitor/callback/appstatus.go @@ -27,6 +27,7 @@ import ( "github.com/prometheus/common/model" "time" "strings" + "github.com/tidwall/gjson" ) // AppStatus 指app性能数据,被选举为leader的worker,也就是app_sync_runtime_server所在的节点 @@ -38,6 +39,11 @@ type AppStatus struct { } func (e *AppStatus) UpdateEndpoints(endpoints ...*config.Endpoint) { + // 用v3 API注册,返回json格试,所以要提前处理一下 + for i, end := range endpoints { + endpoints[i].URL = gjson.Get(end.URL, "Addr").String() + } + newArr := utils.TrimAndSort(endpoints) // change port @@ -46,6 +52,7 @@ func (e *AppStatus) UpdateEndpoints(endpoints ...*config.Endpoint) { } if utils.ArrCompare(e.sortedEndpoints, newArr) { + logrus.Debugf("The endpoints is not modify: %s", e.Name()) return } @@ -64,14 +71,14 @@ func (e *AppStatus) Name() string { } func (e *AppStatus) toScrape() *prometheus.ScrapeConfig { - ts := make([]model.LabelSet, 0, len(e.sortedEndpoints)) + ts := make([]string, 0, len(e.sortedEndpoints)) for _, end := range e.sortedEndpoints { - ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)}) + ts = append(ts, end) } return &prometheus.ScrapeConfig{ JobName: e.Name(), - ScrapeInterval: model.Duration(5 * time.Minute), + ScrapeInterval: model.Duration(time.Minute), ScrapeTimeout: model.Duration(30 * time.Second), MetricsPath: "/metrics", HonorLabels: true, @@ -80,7 +87,7 @@ func (e *AppStatus) toScrape() *prometheus.ScrapeConfig { { Targets: ts, Labels: map[model.LabelName]model.LabelValue{ - "component": "acp_entrance", + "component": model.LabelValue(e.Name()), }, }, }, diff --git a/monitor/callback/entrance.go b/monitor/callback/entrance.go index d52954693..592c04309 100644 --- a/monitor/callback/entrance.go +++ b/monitor/callback/entrance.go @@ -38,6 +38,7 @@ func (e *Entrance) UpdateEndpoints(endpoints ...*config.Endpoint) { newArr := utils.TrimAndSort(endpoints) if utils.ArrCompare(e.sortedEndpoints, newArr) { + logrus.Debugf("The endpoints is not modify: %s", e.Name()) return } @@ -56,9 +57,9 @@ func (e *Entrance) Name() string { } func (e *Entrance) toScrape() *prometheus.ScrapeConfig { - ts := make([]model.LabelSet, 0, len(e.sortedEndpoints)) + ts := make([]string, 0, len(e.sortedEndpoints)) for _, end := range e.sortedEndpoints { - ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)}) + ts = append(ts, end) } return &prometheus.ScrapeConfig{ @@ -72,7 +73,7 @@ func (e *Entrance) toScrape() *prometheus.ScrapeConfig { { Targets: ts, Labels: map[model.LabelName]model.LabelValue{ - "component": "acp_entrance", + "component": model.LabelValue(e.Name()), }, }, }, diff --git a/monitor/callback/etcd.go b/monitor/callback/etcd.go index 4e08b07ba..81bcc6e9a 100644 --- a/monitor/callback/etcd.go +++ b/monitor/callback/etcd.go @@ -38,6 +38,7 @@ func (e *Etcd) UpdateEndpoints(endpoints ...*config.Endpoint) { newArr := utils.TrimAndSort(endpoints) if utils.ArrCompare(e.sortedEndpoints, newArr) { + logrus.Debugf("The endpoints is not modify: %s", e.Name()) return } @@ -56,9 +57,9 @@ func (e *Etcd) Name() string { } func (e *Etcd) toScrape() *prometheus.ScrapeConfig { - ts := make([]model.LabelSet, 0, len(e.sortedEndpoints)) + ts := make([]string, 0, len(e.sortedEndpoints)) for _, end := range e.sortedEndpoints { - ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)}) + ts = append(ts, end) } return &prometheus.ScrapeConfig{ @@ -71,7 +72,7 @@ func (e *Etcd) toScrape() *prometheus.ScrapeConfig { { Targets: ts, Labels: map[model.LabelName]model.LabelValue{ - "component": "acp_entrance", + "component": model.LabelValue(e.Name()), }, }, }, diff --git a/monitor/callback/eventlog.go b/monitor/callback/eventlog.go index 1d25f4353..eb90bf154 100644 --- a/monitor/callback/eventlog.go +++ b/monitor/callback/eventlog.go @@ -38,6 +38,7 @@ func (e *EventLog) UpdateEndpoints(endpoints ...*config.Endpoint) { newArr := utils.TrimAndSort(endpoints) if utils.ArrCompare(e.sortedEndpoints, newArr) { + logrus.Debugf("The endpoints is not modify: %s", e.Name()) return } @@ -56,9 +57,9 @@ func (e *EventLog) Name() string { } func (e *EventLog) toScrape() *prometheus.ScrapeConfig { - ts := make([]model.LabelSet, 0, len(e.sortedEndpoints)) + ts := make([]string, 0, len(e.sortedEndpoints)) for _, end := range e.sortedEndpoints { - ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)}) + ts = append(ts, end) } return &prometheus.ScrapeConfig{ @@ -72,7 +73,7 @@ func (e *EventLog) toScrape() *prometheus.ScrapeConfig { { Targets: ts, Labels: map[model.LabelName]model.LabelValue{ - "component": "acp_event_log", + "component": model.LabelValue(e.Name()), }, }, }, diff --git a/monitor/callback/node.go b/monitor/callback/node.go index 2851caeac..96619f882 100644 --- a/monitor/callback/node.go +++ b/monitor/callback/node.go @@ -42,6 +42,7 @@ func (e *Node) UpdateEndpoints(endpoints ...*config.Endpoint) { newArr := utils.TrimAndSort(endpoints) if utils.ArrCompare(e.sortedEndpoints, newArr) { + logrus.Debugf("The endpoints is not modify: %s", e.Name()) return } @@ -60,9 +61,9 @@ func (e *Node) Name() string { } func (e *Node) toScrape() *prometheus.ScrapeConfig { - ts := make([]model.LabelSet, 0, len(e.sortedEndpoints)) + ts := make([]string, 0, len(e.sortedEndpoints)) for _, end := range e.sortedEndpoints { - ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)}) + ts = append(ts, end) } return &prometheus.ScrapeConfig{ @@ -76,7 +77,7 @@ func (e *Node) toScrape() *prometheus.ScrapeConfig { { Targets: ts, Labels: map[model.LabelName]model.LabelValue{ - "component": "acp_entrance", + "component": model.LabelValue(e.Name()), }, }, }, @@ -111,7 +112,8 @@ func (e *Node) Modify(event *watch.Event) { func (e *Node) Delete(event *watch.Event) { for i, end := range e.endpoints { - if end.URL == event.GetValueString() { + url := gjson.Get(event.GetValueString(), "external_ip").String() + ":6100" + if end.URL == url { e.endpoints = append(e.endpoints[:i], e.endpoints[i+1:]...) e.UpdateEndpoints(e.endpoints...) break diff --git a/monitor/callback/promethes.go b/monitor/callback/promethes.go index b2d3c935a..b727aafe9 100644 --- a/monitor/callback/promethes.go +++ b/monitor/callback/promethes.go @@ -26,6 +26,7 @@ import ( "github.com/goodrain/rainbond/monitor/utils" "github.com/prometheus/common/model" "time" + "github.com/tidwall/gjson" ) // Prometheus 指prometheus的运行指标,数据来源于prometheus自身API @@ -36,9 +37,15 @@ type Prometheus struct { } func (e *Prometheus) UpdateEndpoints(endpoints ...*config.Endpoint) { + // 用v3 API注册,返回json格试,所以要提前处理一下 + for i, end := range endpoints { + endpoints[i].URL = gjson.Get(end.URL, "Addr").String() + } + newArr := utils.TrimAndSort(endpoints) if utils.ArrCompare(e.sortedEndpoints, newArr) { + logrus.Debugf("The endpoints is not modify: %s", e.Name()) return } @@ -57,14 +64,14 @@ func (e *Prometheus) Name() string { } func (e *Prometheus) toScrape() *prometheus.ScrapeConfig { - ts := make([]model.LabelSet, 0, len(e.sortedEndpoints)) + ts := make([]string, 0, len(e.sortedEndpoints)) for _, end := range e.sortedEndpoints { - ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)}) + ts = append(ts, end) } return &prometheus.ScrapeConfig{ JobName: e.Name(), - ScrapeInterval: model.Duration(5 * time.Minute), + ScrapeInterval: model.Duration(time.Minute), ScrapeTimeout: model.Duration(30 * time.Second), MetricsPath: "/metrics", HonorLabels: true, @@ -73,7 +80,7 @@ func (e *Prometheus) toScrape() *prometheus.ScrapeConfig { { Targets: ts, Labels: map[model.LabelName]model.LabelValue{ - "component": "acp_entrance", + "component": model.LabelValue(e.Name()), }, }, }, diff --git a/monitor/monitor.go b/monitor/monitor.go index 4e0e0244c..15f466437 100644 --- a/monitor/monitor.go +++ b/monitor/monitor.go @@ -32,8 +32,8 @@ import ( "os" "syscall" "os/signal" - "github.com/goodrain/rainbond/monitor/prometheus" "github.com/tidwall/gjson" + "github.com/goodrain/rainbond/monitor/prometheus" ) type Monitor struct { @@ -42,50 +42,25 @@ type Monitor struct { cancel context.CancelFunc client *v3.Client timeout time.Duration - stopperList []chan bool - + manager *prometheus.Manager discover1 discover1.Discover discover3 discover3.Discover } func (d *Monitor) Start() { - // create prometheus manager - p := prometheus.NewManager(d.config) - // start prometheus daemon and watching tis status in all time, exit monitor process if start failed - p.StartDaemon(d.GetStopper()) - - d.discover1.AddProject("event_log_event_grpc", &callback.EventLog{Prometheus: p}) - d.discover1.AddProject("acp_entrance", &callback.Entrance{Prometheus: p}) - d.discover3.AddProject("app_sync_runtime_server", &callback.AppStatus{Prometheus: p}) + d.discover1.AddProject("event_log_event_http", &callback.EventLog{Prometheus: d.manager}) + d.discover1.AddProject("acp_entrance", &callback.Entrance{Prometheus: d.manager}) + d.discover3.AddProject("app_sync_runtime_server", &callback.AppStatus{Prometheus: d.manager}) + d.discover3.AddProject("prometheus", &callback.Prometheus{Prometheus: d.manager}) // node and app runtime metrics needs to be monitored separately - go d.discoverNodes(&callback.Node{Prometheus: p}, &callback.App{Prometheus: p}, d.GetStopper()) + go d.discoverNodes(&callback.Node{Prometheus: d.manager}, &callback.App{Prometheus: d.manager}, d.ctx.Done()) - d.listenStop() + // monitor etcd members + go d.discoverEtcd(&callback.Etcd{Prometheus: d.manager}, d.ctx.Done()) } -func (d *Monitor) discoverNodes(node *callback.Node, app *callback.App, done chan bool) { - // get all exist nodes by etcd - resp, err := d.client.Get(d.ctx, "/rainbond/nodes/", v3.WithPrefix()) - if err != nil { - logrus.Error("failed to get all nodes: ", err) - return - } - - for _, kv := range resp.Kvs { - url := gjson.GetBytes(kv.Value, "external_ip").String() + ":6100" - end := &config.Endpoint{ - URL: url, - } - - node.AddEndpoint(end) - - isSlave := gjson.GetBytes(kv.Value, "labels.rainbond_node_rule_compute").String() - if isSlave == "true" { - app.AddEndpoint(end) - } - } - +func (d *Monitor) discoverNodes(node *callback.Node, app *callback.App, done <-chan struct{}) { // start listen node modified watcher := watch.New(d.client, "") w, err := watcher.WatchList(d.ctx, "/rainbond/nodes", "") @@ -130,7 +105,6 @@ func (d *Monitor) discoverNodes(node *callback.Node, app *callback.App, done cha } case <-done: logrus.Info("stop discover nodes because received stop signal.") - close(done) return } @@ -138,13 +112,12 @@ func (d *Monitor) discoverNodes(node *callback.Node, app *callback.App, done cha } -func (d *Monitor) discoverEtcd(e *callback.Etcd, done chan bool) { - t := time.Tick(time.Second * 5) +func (d *Monitor) discoverEtcd(e *callback.Etcd, done <-chan struct{}) { + t := time.Tick(time.Minute) for { select { case <-done: logrus.Info("stop discover etcd because received stop signal.") - close(done) return case <-t: resp, err := d.client.MemberList(d.ctx) @@ -168,39 +141,25 @@ func (d *Monitor) discoverEtcd(e *callback.Etcd, done chan bool) { } func (d *Monitor) Stop() { - logrus.Info("Stop all child process for monitor.") - for _, ch := range d.stopperList { - ch <- true - } - + logrus.Info("Stopping all child process for monitor") + d.cancel() d.discover1.Stop() d.discover3.Stop() d.client.Close() - d.cancel() - - time.Sleep(time.Second) } -func (d *Monitor) GetStopper() chan bool { - ch := make(chan bool, 1) - d.stopperList = append(d.stopperList, ch) - - return ch -} - -func (d *Monitor) listenStop() { +func (d *Monitor) ListenStop() { sigs := make(chan os.Signal, 1) signal.Notify(sigs, syscall.SIGKILL, syscall.SIGINT, syscall.SIGTERM) sig := <- sigs signal.Ignore(syscall.SIGKILL, syscall.SIGINT, syscall.SIGTERM) + logrus.Warn("monitor manager received signal: ", sig.String()) close(sigs) - logrus.Warn("monitor manager received signal: ", sig) - d.Stop() } -func NewMonitor(opt *option.Config) *Monitor { +func NewMonitor(opt *option.Config, p *prometheus.Manager) *Monitor { ctx, cancel := context.WithCancel(context.Background()) defaultTimeout := time.Second * 3 @@ -230,6 +189,7 @@ func NewMonitor(opt *option.Config) *Monitor { config: opt, ctx: ctx, cancel: cancel, + manager: p, client: cli, discover1: dc1, discover3: dc3, diff --git a/monitor/prometheus/config.go b/monitor/prometheus/config.go index d8f206e5e..78e288549 100644 --- a/monitor/prometheus/config.go +++ b/monitor/prometheus/config.go @@ -186,12 +186,12 @@ type ServiceDiscoveryConfig struct { type Group struct { // Targets is a list of targets identified by a label set. Each target is // uniquely identifiable in the group by its address label. - Targets []model.LabelSet + Targets []string `yaml:"targets,flow"` // Labels is a set of labels that is common across all targets in the group. - Labels model.LabelSet + Labels model.LabelSet `yaml:"labels,omitempty"` // Source is an identifier that describes a group of targets. - Source string + Source string `yaml:"source,omitempty"` } // Regexp encapsulates a regexp.Regexp and makes it YAML marshallable. diff --git a/monitor/prometheus/manager.go b/monitor/prometheus/manager.go index adaaaf8c8..aa0c0349c 100644 --- a/monitor/prometheus/manager.go +++ b/monitor/prometheus/manager.go @@ -19,23 +19,25 @@ package prometheus import ( + "fmt" "github.com/Sirupsen/logrus" "github.com/goodrain/rainbond/cmd/monitor/option" + discover3 "github.com/goodrain/rainbond/discover.v2" "gopkg.in/yaml.v2" "io/ioutil" "net/http" - "os/exec" "sync" "time" - discover3 "github.com/goodrain/rainbond/discover.v2" - "fmt" + "os" + "syscall" ) type Manager struct { ApiUrl string Opt *option.Config Config *Config - Reg *discover3.KeepAlive + Process *os.Process + Registry *discover3.KeepAlive httpClient *http.Client l *sync.Mutex } @@ -50,17 +52,75 @@ func NewManager(config *option.Config) *Manager { panic(err) } - return &Manager{ - ApiUrl: fmt.Sprintf("http://127.0.0.1:%s", config.Port), + m := &Manager{ + ApiUrl: fmt.Sprintf("http://127.0.0.1:%d", config.Port), Opt: config, Config: &Config{}, - Reg: reg, + Registry: reg, httpClient: client, l: &sync.Mutex{}, } + m.LoadConfig() + + return m +} + +func (p *Manager) StartDaemon() { + logrus.Info("Start daemon for prometheus.") + + procAttr := &os.ProcAttr{ + Files: []*os.File{os.Stdin, os.Stdout, os.Stderr}, + } + process, err := os.StartProcess("/bin/prometheus", p.Opt.Args, procAttr) + if err != nil { + if err != nil { + logrus.Error("Can not start prometheus daemon: ", err) + os.Exit(11) + } + } + p.Process = process + + // waiting started + for i := 0; i < 15; i++ { + time.Sleep(time.Second) + if _, err = os.FindProcess(process.Pid); err == nil { + logrus.Info("The prometheus daemon is started.") + return + } + } + + logrus.Error("Timeout start prometheus daemon: ", err) + os.Exit(13) +} + +func (p *Manager) StopDaemon() { + logrus.Info("Stopping prometheus daemon ...") + //exec.Command("sh", "-c", "kill `pgrep prometheus` ; while pgrep prometheus; do sleep 1; done").Run() + p.Process.Kill() + p.Process.Wait() + logrus.Info("Stopped prometheus daemon.") +} + +func (p *Manager) RestartDaemon() error { + logrus.Debug("Restart daemon for prometheus.") + //request, err := http.NewRequest("POST", p.ApiUrl+"/-/reload", nil) + //if err != nil { + // logrus.Error("Create request to load config error: ", err) + // return err + //} + // + //_, err = p.httpClient.Do(request) + + if err := p.Process.Signal(syscall.SIGHUP); err != nil { + logrus.Error("Failed to restart daemon for prometheus: ", err) + return err + } + + return nil } func (p *Manager) LoadConfig() error { + logrus.Info("Load prometheus config file.") context, err := ioutil.ReadFile(p.Opt.ConfigFile) if err != nil { logrus.Error("Failed to read prometheus config file: ", err) @@ -71,11 +131,13 @@ func (p *Manager) LoadConfig() error { logrus.Error("Unmarshal prometheus config string to object error.", err.Error()) return err } + logrus.Debugf("Loaded config file to memory: %+v", p.Config) //TODO return nil } func (p *Manager) SaveConfig() error { + logrus.Debug("Save prometheus config file.") data, err := yaml.Marshal(p.Config) if err != nil { logrus.Error("Marshal prometheus config to yaml error.", err.Error()) @@ -91,72 +153,25 @@ func (p *Manager) SaveConfig() error { return nil } -func (p *Manager) StartDaemon(done chan bool) { - cmd := "which prometheus && " + - "prometheus " + - "--web.listen-address=:%s " + - "--storage.tsdb.path=/prometheusdata " + - "--storage.tsdb.retention=7d " + - "--config.file=%s &" - - cmd = fmt.Sprintf(cmd, p.Opt.Port, p.Opt.ConfigFile) - - err := exec.Command("sh", "-c", cmd).Run() - if err != nil { - logrus.Error("Can not start prometheus daemon: ", err) - panic(err) - } - - p.Reg.Start() - defer p.Reg.Stop() - - t := time.Tick(time.Second * 5) - for { - select { - case <-done: - exec.Command("sh", "-c", "kill `pgrep prometheus`").Run() - return - case <-t: - err := exec.Command("sh", "-c", "pgrep prometheus").Run() - if err != nil { - logrus.Error("the prometheus process is exited, ready to restart it.") - err := exec.Command("sh", "-c", cmd).Run() - if err == nil { - logrus.Error("Failed to restart the prometheus daemon: ", err) - } - } - } - } - -} - -func (p *Manager) RestartDaemon() error { - request, err := http.NewRequest("POST", p.ApiUrl+"/-/reload", nil) - if err != nil { - logrus.Error("Create request to load config error: ", err) - return err - } - - _, err = p.httpClient.Do(request) - if err != nil { - logrus.Error("load config error: ", err) - return err - } - - return nil -} - func (p *Manager) UpdateScrape(scrape *ScrapeConfig) { + logrus.Debugf("update scrape: %+v", scrape) + p.l.Lock() defer p.l.Unlock() + exist := false for i, s := range p.Config.ScrapeConfigs { if s.JobName == scrape.JobName { p.Config.ScrapeConfigs[i] = scrape + exist = true break } } + if !exist { + p.Config.ScrapeConfigs = append(p.Config.ScrapeConfigs, scrape) + } + p.SaveConfig() p.RestartDaemon() } diff --git a/release.sh b/release.sh index c2bd1b4f4..736c5c55d 100755 --- a/release.sh +++ b/release.sh @@ -57,7 +57,8 @@ function build::image() { elif [ "$1" = "chaos" ];then docker run --rm -v `pwd`:${WORK_DIR} -w ${WORK_DIR} -it golang:1.8.3 go build -ldflags '-w -s' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/builder elif [ "$1" = "monitor" ];then - GOOS=linux go build -ldflags '-w -s' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/builder + docker run --rm -v `pwd`:${WORK_DIR} -w ${WORK_DIR} -it golang:1.8.3 go build -ldflags "-w -s -extldflags '-static'" -tags 'netgo static_build' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/$1 + #go build -ldflags "-w -s -extldflags '-static'" -tags 'netgo static_build' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/monitor else docker run --rm -v `pwd`:${WORK_DIR} -w ${WORK_DIR} -it golang:1.8.3 go build -ldflags '-w -s' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/$1 fi