mirror of
https://gitee.com/rainbond/Rainbond.git
synced 2024-11-30 02:38:17 +08:00
[ADD] New feature monitoring component.
This commit is contained in:
parent
350b91d871
commit
4470bf64ee
@ -22,6 +22,7 @@ import (
|
||||
"github.com/goodrain/rainbond/cmd/monitor/option"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/goodrain/rainbond/monitor"
|
||||
"github.com/goodrain/rainbond/monitor/prometheus"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@ -31,6 +32,19 @@ func main() {
|
||||
|
||||
c.CompleteConfig()
|
||||
|
||||
m := monitor.NewMonitor(c)
|
||||
// start prometheus daemon and watching tis status in all time, exit monitor process if start failed
|
||||
p := prometheus.NewManager(c)
|
||||
p.StartDaemon()
|
||||
defer p.StopDaemon()
|
||||
|
||||
// register prometheus address to etcd cluster
|
||||
p.Registry.Start()
|
||||
defer p.Registry.Stop()
|
||||
|
||||
// start watching components from etcd, and update modify to prometheus config
|
||||
m := monitor.NewMonitor(c, p)
|
||||
m.Start()
|
||||
defer m.Stop()
|
||||
|
||||
m.ListenStop()
|
||||
}
|
||||
|
@ -23,6 +23,8 @@ import (
|
||||
"github.com/Sirupsen/logrus"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
@ -31,28 +33,63 @@ type Config struct {
|
||||
ConfigFile string
|
||||
BindIp string
|
||||
Port int
|
||||
Options string
|
||||
Args []string
|
||||
}
|
||||
|
||||
func NewConfig() *Config {
|
||||
h, _ := os.Hostname()
|
||||
return &Config{
|
||||
host, _ := os.Hostname()
|
||||
|
||||
config := &Config{
|
||||
EtcdEndpoints: []string{"http://127.0.0.1:2379"},
|
||||
LogLevel: "info",
|
||||
ConfigFile: "/etc/prometheus/prometheus.yml",
|
||||
BindIp: h,
|
||||
BindIp: host,
|
||||
Port: 9999,
|
||||
LogLevel: "info",
|
||||
}
|
||||
|
||||
defaultOptions := "--web.listen-address=%s:%d --config.file=%s --storage.tsdb.path=/prometheusdata --storage.tsdb.retention=7d --log.level=%s"
|
||||
defaultOptions = fmt.Sprintf(defaultOptions, config.BindIp, config.Port, config.ConfigFile, config.LogLevel)
|
||||
|
||||
config.Options = defaultOptions
|
||||
return config
|
||||
}
|
||||
|
||||
func (c *Config) AddFlag(cmd *pflag.FlagSet) {
|
||||
cmd.StringArrayVar(&c.EtcdEndpoints, "etcd-endpoints", c.EtcdEndpoints, "etcd endpoints list")
|
||||
cmd.StringVar(&c.LogLevel, "log-level", c.LogLevel, "log level")
|
||||
cmd.StringVar(&c.ConfigFile, "config-file", c.ConfigFile, "prometheus config file path")
|
||||
cmd.StringVar(&c.BindIp, "bind-ip", c.BindIp, "prometheus bind ip")
|
||||
cmd.IntVar(&c.Port, "port", c.Port, "prometheus listen port")
|
||||
cmd.StringVar(&c.Options, "prometheus-options", c.Options, "specified options for prometheus")
|
||||
}
|
||||
|
||||
func (c *Config) CompleteConfig() {
|
||||
// parse values from prometheus options to config
|
||||
args := strings.Split(c.Options, " ")
|
||||
for i := 0; i < len(args); i++ {
|
||||
kv := strings.Split(args[i], "=")
|
||||
if len(kv) < 2 {
|
||||
kv = append(kv, args[i])
|
||||
i++
|
||||
}
|
||||
|
||||
switch kv[0] {
|
||||
case "--web.listen-address":
|
||||
ipPort := strings.Split(kv[1], ":")
|
||||
if ipPort[0] != "" {
|
||||
c.BindIp = ipPort[0]
|
||||
}
|
||||
port, err := strconv.Atoi(ipPort[1])
|
||||
if err == nil && port != 0 {
|
||||
c.Port = port
|
||||
}
|
||||
case "--config.file":
|
||||
c.ConfigFile = kv[1]
|
||||
case "--log.level":
|
||||
c.LogLevel = kv[1]
|
||||
}
|
||||
}
|
||||
|
||||
c.Args = append(c.Args, os.Args[0])
|
||||
c.Args = append(c.Args, args...)
|
||||
|
||||
level, err := logrus.ParseLevel(c.LogLevel)
|
||||
if err != nil {
|
||||
fmt.Println("ERROR set log level:", err)
|
||||
@ -60,4 +97,5 @@ func (c *Config) CompleteConfig() {
|
||||
}
|
||||
logrus.SetLevel(level)
|
||||
|
||||
logrus.Info("Start with options: ", c)
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
FROM prom/prometheus:v2.2.1
|
||||
|
||||
USER root
|
||||
VOLUME ["/prometheusdata"]
|
||||
|
||||
ENV RELEASE_DESC=__RELEASE_DESC__
|
||||
|
||||
ADD rainbond-monitor /run/monitor
|
||||
COPY rainbond-monitor /usr/bin/monitor
|
||||
|
||||
ENTRYPOINT [""]
|
||||
CMD [ "/run/monitor" ]
|
||||
ENTRYPOINT ["/usr/bin/monitor"]
|
@ -27,6 +27,7 @@ import (
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/tidwall/gjson"
|
||||
"time"
|
||||
"github.com/goodrain/rainbond/monitor/utils"
|
||||
)
|
||||
|
||||
// App 指app运行时信息,来源于所有子节点上的node
|
||||
@ -40,8 +41,17 @@ type App struct {
|
||||
}
|
||||
|
||||
func (e *App) UpdateEndpoints(endpoints ...*config.Endpoint) {
|
||||
newArr := utils.TrimAndSort(endpoints)
|
||||
|
||||
return
|
||||
if utils.ArrCompare(e.sortedEndpoints, newArr) {
|
||||
logrus.Debugf("The endpoints is not modify: %s", e.Name())
|
||||
return
|
||||
}
|
||||
|
||||
e.sortedEndpoints = newArr
|
||||
|
||||
scrape := e.toScrape()
|
||||
e.Prometheus.UpdateScrape(scrape)
|
||||
}
|
||||
|
||||
func (e *App) Error(err error) {
|
||||
@ -53,9 +63,9 @@ func (e *App) Name() string {
|
||||
}
|
||||
|
||||
func (e *App) toScrape() *prometheus.ScrapeConfig {
|
||||
ts := make([]model.LabelSet, 0, len(e.sortedEndpoints))
|
||||
ts := make([]string, 0, len(e.sortedEndpoints))
|
||||
for _, end := range e.sortedEndpoints {
|
||||
ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)})
|
||||
ts = append(ts, end)
|
||||
}
|
||||
|
||||
return &prometheus.ScrapeConfig{
|
||||
@ -69,7 +79,7 @@ func (e *App) toScrape() *prometheus.ScrapeConfig {
|
||||
{
|
||||
Targets: ts,
|
||||
Labels: map[model.LabelName]model.LabelValue{
|
||||
"component": "acp_entrance",
|
||||
"component": model.LabelValue(e.Name()),
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -104,7 +114,8 @@ func (e *App) Modify(event *watch.Event) {
|
||||
|
||||
func (e *App) Delete(event *watch.Event) {
|
||||
for i, end := range e.endpoints {
|
||||
if end.URL == event.GetValueString() {
|
||||
url := gjson.Get(event.GetValueString(), "external_ip").String() + ":6100"
|
||||
if end.URL == url {
|
||||
e.endpoints = append(e.endpoints[:i], e.endpoints[i+1:]...)
|
||||
e.UpdateEndpoints(e.endpoints...)
|
||||
break
|
||||
|
@ -27,6 +27,7 @@ import (
|
||||
"github.com/prometheus/common/model"
|
||||
"time"
|
||||
"strings"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// AppStatus 指app性能数据,被选举为leader的worker,也就是app_sync_runtime_server所在的节点
|
||||
@ -38,6 +39,11 @@ type AppStatus struct {
|
||||
}
|
||||
|
||||
func (e *AppStatus) UpdateEndpoints(endpoints ...*config.Endpoint) {
|
||||
// 用v3 API注册,返回json格试,所以要提前处理一下
|
||||
for i, end := range endpoints {
|
||||
endpoints[i].URL = gjson.Get(end.URL, "Addr").String()
|
||||
}
|
||||
|
||||
newArr := utils.TrimAndSort(endpoints)
|
||||
|
||||
// change port
|
||||
@ -46,6 +52,7 @@ func (e *AppStatus) UpdateEndpoints(endpoints ...*config.Endpoint) {
|
||||
}
|
||||
|
||||
if utils.ArrCompare(e.sortedEndpoints, newArr) {
|
||||
logrus.Debugf("The endpoints is not modify: %s", e.Name())
|
||||
return
|
||||
}
|
||||
|
||||
@ -64,14 +71,14 @@ func (e *AppStatus) Name() string {
|
||||
}
|
||||
|
||||
func (e *AppStatus) toScrape() *prometheus.ScrapeConfig {
|
||||
ts := make([]model.LabelSet, 0, len(e.sortedEndpoints))
|
||||
ts := make([]string, 0, len(e.sortedEndpoints))
|
||||
for _, end := range e.sortedEndpoints {
|
||||
ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)})
|
||||
ts = append(ts, end)
|
||||
}
|
||||
|
||||
return &prometheus.ScrapeConfig{
|
||||
JobName: e.Name(),
|
||||
ScrapeInterval: model.Duration(5 * time.Minute),
|
||||
ScrapeInterval: model.Duration(time.Minute),
|
||||
ScrapeTimeout: model.Duration(30 * time.Second),
|
||||
MetricsPath: "/metrics",
|
||||
HonorLabels: true,
|
||||
@ -80,7 +87,7 @@ func (e *AppStatus) toScrape() *prometheus.ScrapeConfig {
|
||||
{
|
||||
Targets: ts,
|
||||
Labels: map[model.LabelName]model.LabelValue{
|
||||
"component": "acp_entrance",
|
||||
"component": model.LabelValue(e.Name()),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -38,6 +38,7 @@ func (e *Entrance) UpdateEndpoints(endpoints ...*config.Endpoint) {
|
||||
newArr := utils.TrimAndSort(endpoints)
|
||||
|
||||
if utils.ArrCompare(e.sortedEndpoints, newArr) {
|
||||
logrus.Debugf("The endpoints is not modify: %s", e.Name())
|
||||
return
|
||||
}
|
||||
|
||||
@ -56,9 +57,9 @@ func (e *Entrance) Name() string {
|
||||
}
|
||||
|
||||
func (e *Entrance) toScrape() *prometheus.ScrapeConfig {
|
||||
ts := make([]model.LabelSet, 0, len(e.sortedEndpoints))
|
||||
ts := make([]string, 0, len(e.sortedEndpoints))
|
||||
for _, end := range e.sortedEndpoints {
|
||||
ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)})
|
||||
ts = append(ts, end)
|
||||
}
|
||||
|
||||
return &prometheus.ScrapeConfig{
|
||||
@ -72,7 +73,7 @@ func (e *Entrance) toScrape() *prometheus.ScrapeConfig {
|
||||
{
|
||||
Targets: ts,
|
||||
Labels: map[model.LabelName]model.LabelValue{
|
||||
"component": "acp_entrance",
|
||||
"component": model.LabelValue(e.Name()),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -38,6 +38,7 @@ func (e *Etcd) UpdateEndpoints(endpoints ...*config.Endpoint) {
|
||||
newArr := utils.TrimAndSort(endpoints)
|
||||
|
||||
if utils.ArrCompare(e.sortedEndpoints, newArr) {
|
||||
logrus.Debugf("The endpoints is not modify: %s", e.Name())
|
||||
return
|
||||
}
|
||||
|
||||
@ -56,9 +57,9 @@ func (e *Etcd) Name() string {
|
||||
}
|
||||
|
||||
func (e *Etcd) toScrape() *prometheus.ScrapeConfig {
|
||||
ts := make([]model.LabelSet, 0, len(e.sortedEndpoints))
|
||||
ts := make([]string, 0, len(e.sortedEndpoints))
|
||||
for _, end := range e.sortedEndpoints {
|
||||
ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)})
|
||||
ts = append(ts, end)
|
||||
}
|
||||
|
||||
return &prometheus.ScrapeConfig{
|
||||
@ -71,7 +72,7 @@ func (e *Etcd) toScrape() *prometheus.ScrapeConfig {
|
||||
{
|
||||
Targets: ts,
|
||||
Labels: map[model.LabelName]model.LabelValue{
|
||||
"component": "acp_entrance",
|
||||
"component": model.LabelValue(e.Name()),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -38,6 +38,7 @@ func (e *EventLog) UpdateEndpoints(endpoints ...*config.Endpoint) {
|
||||
newArr := utils.TrimAndSort(endpoints)
|
||||
|
||||
if utils.ArrCompare(e.sortedEndpoints, newArr) {
|
||||
logrus.Debugf("The endpoints is not modify: %s", e.Name())
|
||||
return
|
||||
}
|
||||
|
||||
@ -56,9 +57,9 @@ func (e *EventLog) Name() string {
|
||||
}
|
||||
|
||||
func (e *EventLog) toScrape() *prometheus.ScrapeConfig {
|
||||
ts := make([]model.LabelSet, 0, len(e.sortedEndpoints))
|
||||
ts := make([]string, 0, len(e.sortedEndpoints))
|
||||
for _, end := range e.sortedEndpoints {
|
||||
ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)})
|
||||
ts = append(ts, end)
|
||||
}
|
||||
|
||||
return &prometheus.ScrapeConfig{
|
||||
@ -72,7 +73,7 @@ func (e *EventLog) toScrape() *prometheus.ScrapeConfig {
|
||||
{
|
||||
Targets: ts,
|
||||
Labels: map[model.LabelName]model.LabelValue{
|
||||
"component": "acp_event_log",
|
||||
"component": model.LabelValue(e.Name()),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -42,6 +42,7 @@ func (e *Node) UpdateEndpoints(endpoints ...*config.Endpoint) {
|
||||
newArr := utils.TrimAndSort(endpoints)
|
||||
|
||||
if utils.ArrCompare(e.sortedEndpoints, newArr) {
|
||||
logrus.Debugf("The endpoints is not modify: %s", e.Name())
|
||||
return
|
||||
}
|
||||
|
||||
@ -60,9 +61,9 @@ func (e *Node) Name() string {
|
||||
}
|
||||
|
||||
func (e *Node) toScrape() *prometheus.ScrapeConfig {
|
||||
ts := make([]model.LabelSet, 0, len(e.sortedEndpoints))
|
||||
ts := make([]string, 0, len(e.sortedEndpoints))
|
||||
for _, end := range e.sortedEndpoints {
|
||||
ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)})
|
||||
ts = append(ts, end)
|
||||
}
|
||||
|
||||
return &prometheus.ScrapeConfig{
|
||||
@ -76,7 +77,7 @@ func (e *Node) toScrape() *prometheus.ScrapeConfig {
|
||||
{
|
||||
Targets: ts,
|
||||
Labels: map[model.LabelName]model.LabelValue{
|
||||
"component": "acp_entrance",
|
||||
"component": model.LabelValue(e.Name()),
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -111,7 +112,8 @@ func (e *Node) Modify(event *watch.Event) {
|
||||
|
||||
func (e *Node) Delete(event *watch.Event) {
|
||||
for i, end := range e.endpoints {
|
||||
if end.URL == event.GetValueString() {
|
||||
url := gjson.Get(event.GetValueString(), "external_ip").String() + ":6100"
|
||||
if end.URL == url {
|
||||
e.endpoints = append(e.endpoints[:i], e.endpoints[i+1:]...)
|
||||
e.UpdateEndpoints(e.endpoints...)
|
||||
break
|
||||
|
@ -26,6 +26,7 @@ import (
|
||||
"github.com/goodrain/rainbond/monitor/utils"
|
||||
"github.com/prometheus/common/model"
|
||||
"time"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// Prometheus 指prometheus的运行指标,数据来源于prometheus自身API
|
||||
@ -36,9 +37,15 @@ type Prometheus struct {
|
||||
}
|
||||
|
||||
func (e *Prometheus) UpdateEndpoints(endpoints ...*config.Endpoint) {
|
||||
// 用v3 API注册,返回json格试,所以要提前处理一下
|
||||
for i, end := range endpoints {
|
||||
endpoints[i].URL = gjson.Get(end.URL, "Addr").String()
|
||||
}
|
||||
|
||||
newArr := utils.TrimAndSort(endpoints)
|
||||
|
||||
if utils.ArrCompare(e.sortedEndpoints, newArr) {
|
||||
logrus.Debugf("The endpoints is not modify: %s", e.Name())
|
||||
return
|
||||
}
|
||||
|
||||
@ -57,14 +64,14 @@ func (e *Prometheus) Name() string {
|
||||
}
|
||||
|
||||
func (e *Prometheus) toScrape() *prometheus.ScrapeConfig {
|
||||
ts := make([]model.LabelSet, 0, len(e.sortedEndpoints))
|
||||
ts := make([]string, 0, len(e.sortedEndpoints))
|
||||
for _, end := range e.sortedEndpoints {
|
||||
ts = append(ts, model.LabelSet{model.AddressLabel: model.LabelValue(end)})
|
||||
ts = append(ts, end)
|
||||
}
|
||||
|
||||
return &prometheus.ScrapeConfig{
|
||||
JobName: e.Name(),
|
||||
ScrapeInterval: model.Duration(5 * time.Minute),
|
||||
ScrapeInterval: model.Duration(time.Minute),
|
||||
ScrapeTimeout: model.Duration(30 * time.Second),
|
||||
MetricsPath: "/metrics",
|
||||
HonorLabels: true,
|
||||
@ -73,7 +80,7 @@ func (e *Prometheus) toScrape() *prometheus.ScrapeConfig {
|
||||
{
|
||||
Targets: ts,
|
||||
Labels: map[model.LabelName]model.LabelValue{
|
||||
"component": "acp_entrance",
|
||||
"component": model.LabelValue(e.Name()),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -32,8 +32,8 @@ import (
|
||||
"os"
|
||||
"syscall"
|
||||
"os/signal"
|
||||
"github.com/goodrain/rainbond/monitor/prometheus"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/goodrain/rainbond/monitor/prometheus"
|
||||
)
|
||||
|
||||
type Monitor struct {
|
||||
@ -42,50 +42,25 @@ type Monitor struct {
|
||||
cancel context.CancelFunc
|
||||
client *v3.Client
|
||||
timeout time.Duration
|
||||
stopperList []chan bool
|
||||
|
||||
manager *prometheus.Manager
|
||||
discover1 discover1.Discover
|
||||
discover3 discover3.Discover
|
||||
}
|
||||
|
||||
func (d *Monitor) Start() {
|
||||
// create prometheus manager
|
||||
p := prometheus.NewManager(d.config)
|
||||
// start prometheus daemon and watching tis status in all time, exit monitor process if start failed
|
||||
p.StartDaemon(d.GetStopper())
|
||||
|
||||
d.discover1.AddProject("event_log_event_grpc", &callback.EventLog{Prometheus: p})
|
||||
d.discover1.AddProject("acp_entrance", &callback.Entrance{Prometheus: p})
|
||||
d.discover3.AddProject("app_sync_runtime_server", &callback.AppStatus{Prometheus: p})
|
||||
d.discover1.AddProject("event_log_event_http", &callback.EventLog{Prometheus: d.manager})
|
||||
d.discover1.AddProject("acp_entrance", &callback.Entrance{Prometheus: d.manager})
|
||||
d.discover3.AddProject("app_sync_runtime_server", &callback.AppStatus{Prometheus: d.manager})
|
||||
d.discover3.AddProject("prometheus", &callback.Prometheus{Prometheus: d.manager})
|
||||
|
||||
// node and app runtime metrics needs to be monitored separately
|
||||
go d.discoverNodes(&callback.Node{Prometheus: p}, &callback.App{Prometheus: p}, d.GetStopper())
|
||||
go d.discoverNodes(&callback.Node{Prometheus: d.manager}, &callback.App{Prometheus: d.manager}, d.ctx.Done())
|
||||
|
||||
d.listenStop()
|
||||
// monitor etcd members
|
||||
go d.discoverEtcd(&callback.Etcd{Prometheus: d.manager}, d.ctx.Done())
|
||||
}
|
||||
|
||||
func (d *Monitor) discoverNodes(node *callback.Node, app *callback.App, done chan bool) {
|
||||
// get all exist nodes by etcd
|
||||
resp, err := d.client.Get(d.ctx, "/rainbond/nodes/", v3.WithPrefix())
|
||||
if err != nil {
|
||||
logrus.Error("failed to get all nodes: ", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, kv := range resp.Kvs {
|
||||
url := gjson.GetBytes(kv.Value, "external_ip").String() + ":6100"
|
||||
end := &config.Endpoint{
|
||||
URL: url,
|
||||
}
|
||||
|
||||
node.AddEndpoint(end)
|
||||
|
||||
isSlave := gjson.GetBytes(kv.Value, "labels.rainbond_node_rule_compute").String()
|
||||
if isSlave == "true" {
|
||||
app.AddEndpoint(end)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Monitor) discoverNodes(node *callback.Node, app *callback.App, done <-chan struct{}) {
|
||||
// start listen node modified
|
||||
watcher := watch.New(d.client, "")
|
||||
w, err := watcher.WatchList(d.ctx, "/rainbond/nodes", "")
|
||||
@ -130,7 +105,6 @@ func (d *Monitor) discoverNodes(node *callback.Node, app *callback.App, done cha
|
||||
}
|
||||
case <-done:
|
||||
logrus.Info("stop discover nodes because received stop signal.")
|
||||
close(done)
|
||||
return
|
||||
}
|
||||
|
||||
@ -138,13 +112,12 @@ func (d *Monitor) discoverNodes(node *callback.Node, app *callback.App, done cha
|
||||
|
||||
}
|
||||
|
||||
func (d *Monitor) discoverEtcd(e *callback.Etcd, done chan bool) {
|
||||
t := time.Tick(time.Second * 5)
|
||||
func (d *Monitor) discoverEtcd(e *callback.Etcd, done <-chan struct{}) {
|
||||
t := time.Tick(time.Minute)
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
logrus.Info("stop discover etcd because received stop signal.")
|
||||
close(done)
|
||||
return
|
||||
case <-t:
|
||||
resp, err := d.client.MemberList(d.ctx)
|
||||
@ -168,39 +141,25 @@ func (d *Monitor) discoverEtcd(e *callback.Etcd, done chan bool) {
|
||||
}
|
||||
|
||||
func (d *Monitor) Stop() {
|
||||
logrus.Info("Stop all child process for monitor.")
|
||||
for _, ch := range d.stopperList {
|
||||
ch <- true
|
||||
}
|
||||
|
||||
logrus.Info("Stopping all child process for monitor")
|
||||
d.cancel()
|
||||
d.discover1.Stop()
|
||||
d.discover3.Stop()
|
||||
d.client.Close()
|
||||
d.cancel()
|
||||
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
|
||||
func (d *Monitor) GetStopper() chan bool {
|
||||
ch := make(chan bool, 1)
|
||||
d.stopperList = append(d.stopperList, ch)
|
||||
|
||||
return ch
|
||||
}
|
||||
|
||||
func (d *Monitor) listenStop() {
|
||||
func (d *Monitor) ListenStop() {
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGKILL, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
sig := <- sigs
|
||||
signal.Ignore(syscall.SIGKILL, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
logrus.Warn("monitor manager received signal: ", sig.String())
|
||||
close(sigs)
|
||||
logrus.Warn("monitor manager received signal: ", sig)
|
||||
d.Stop()
|
||||
}
|
||||
|
||||
func NewMonitor(opt *option.Config) *Monitor {
|
||||
func NewMonitor(opt *option.Config, p *prometheus.Manager) *Monitor {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defaultTimeout := time.Second * 3
|
||||
|
||||
@ -230,6 +189,7 @@ func NewMonitor(opt *option.Config) *Monitor {
|
||||
config: opt,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
manager: p,
|
||||
client: cli,
|
||||
discover1: dc1,
|
||||
discover3: dc3,
|
||||
|
@ -186,12 +186,12 @@ type ServiceDiscoveryConfig struct {
|
||||
type Group struct {
|
||||
// Targets is a list of targets identified by a label set. Each target is
|
||||
// uniquely identifiable in the group by its address label.
|
||||
Targets []model.LabelSet
|
||||
Targets []string `yaml:"targets,flow"`
|
||||
// Labels is a set of labels that is common across all targets in the group.
|
||||
Labels model.LabelSet
|
||||
Labels model.LabelSet `yaml:"labels,omitempty"`
|
||||
|
||||
// Source is an identifier that describes a group of targets.
|
||||
Source string
|
||||
Source string `yaml:"source,omitempty"`
|
||||
}
|
||||
|
||||
// Regexp encapsulates a regexp.Regexp and makes it YAML marshallable.
|
||||
|
@ -19,23 +19,25 @@
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/goodrain/rainbond/cmd/monitor/option"
|
||||
discover3 "github.com/goodrain/rainbond/discover.v2"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"time"
|
||||
discover3 "github.com/goodrain/rainbond/discover.v2"
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
ApiUrl string
|
||||
Opt *option.Config
|
||||
Config *Config
|
||||
Reg *discover3.KeepAlive
|
||||
Process *os.Process
|
||||
Registry *discover3.KeepAlive
|
||||
httpClient *http.Client
|
||||
l *sync.Mutex
|
||||
}
|
||||
@ -50,17 +52,75 @@ func NewManager(config *option.Config) *Manager {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return &Manager{
|
||||
ApiUrl: fmt.Sprintf("http://127.0.0.1:%s", config.Port),
|
||||
m := &Manager{
|
||||
ApiUrl: fmt.Sprintf("http://127.0.0.1:%d", config.Port),
|
||||
Opt: config,
|
||||
Config: &Config{},
|
||||
Reg: reg,
|
||||
Registry: reg,
|
||||
httpClient: client,
|
||||
l: &sync.Mutex{},
|
||||
}
|
||||
m.LoadConfig()
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func (p *Manager) StartDaemon() {
|
||||
logrus.Info("Start daemon for prometheus.")
|
||||
|
||||
procAttr := &os.ProcAttr{
|
||||
Files: []*os.File{os.Stdin, os.Stdout, os.Stderr},
|
||||
}
|
||||
process, err := os.StartProcess("/bin/prometheus", p.Opt.Args, procAttr)
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
logrus.Error("Can not start prometheus daemon: ", err)
|
||||
os.Exit(11)
|
||||
}
|
||||
}
|
||||
p.Process = process
|
||||
|
||||
// waiting started
|
||||
for i := 0; i < 15; i++ {
|
||||
time.Sleep(time.Second)
|
||||
if _, err = os.FindProcess(process.Pid); err == nil {
|
||||
logrus.Info("The prometheus daemon is started.")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
logrus.Error("Timeout start prometheus daemon: ", err)
|
||||
os.Exit(13)
|
||||
}
|
||||
|
||||
func (p *Manager) StopDaemon() {
|
||||
logrus.Info("Stopping prometheus daemon ...")
|
||||
//exec.Command("sh", "-c", "kill `pgrep prometheus` ; while pgrep prometheus; do sleep 1; done").Run()
|
||||
p.Process.Kill()
|
||||
p.Process.Wait()
|
||||
logrus.Info("Stopped prometheus daemon.")
|
||||
}
|
||||
|
||||
func (p *Manager) RestartDaemon() error {
|
||||
logrus.Debug("Restart daemon for prometheus.")
|
||||
//request, err := http.NewRequest("POST", p.ApiUrl+"/-/reload", nil)
|
||||
//if err != nil {
|
||||
// logrus.Error("Create request to load config error: ", err)
|
||||
// return err
|
||||
//}
|
||||
//
|
||||
//_, err = p.httpClient.Do(request)
|
||||
|
||||
if err := p.Process.Signal(syscall.SIGHUP); err != nil {
|
||||
logrus.Error("Failed to restart daemon for prometheus: ", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Manager) LoadConfig() error {
|
||||
logrus.Info("Load prometheus config file.")
|
||||
context, err := ioutil.ReadFile(p.Opt.ConfigFile)
|
||||
if err != nil {
|
||||
logrus.Error("Failed to read prometheus config file: ", err)
|
||||
@ -71,11 +131,13 @@ func (p *Manager) LoadConfig() error {
|
||||
logrus.Error("Unmarshal prometheus config string to object error.", err.Error())
|
||||
return err
|
||||
}
|
||||
logrus.Debugf("Loaded config file to memory: %+v", p.Config) //TODO
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Manager) SaveConfig() error {
|
||||
logrus.Debug("Save prometheus config file.")
|
||||
data, err := yaml.Marshal(p.Config)
|
||||
if err != nil {
|
||||
logrus.Error("Marshal prometheus config to yaml error.", err.Error())
|
||||
@ -91,72 +153,25 @@ func (p *Manager) SaveConfig() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Manager) StartDaemon(done chan bool) {
|
||||
cmd := "which prometheus && " +
|
||||
"prometheus " +
|
||||
"--web.listen-address=:%s " +
|
||||
"--storage.tsdb.path=/prometheusdata " +
|
||||
"--storage.tsdb.retention=7d " +
|
||||
"--config.file=%s &"
|
||||
|
||||
cmd = fmt.Sprintf(cmd, p.Opt.Port, p.Opt.ConfigFile)
|
||||
|
||||
err := exec.Command("sh", "-c", cmd).Run()
|
||||
if err != nil {
|
||||
logrus.Error("Can not start prometheus daemon: ", err)
|
||||
panic(err)
|
||||
}
|
||||
|
||||
p.Reg.Start()
|
||||
defer p.Reg.Stop()
|
||||
|
||||
t := time.Tick(time.Second * 5)
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
exec.Command("sh", "-c", "kill `pgrep prometheus`").Run()
|
||||
return
|
||||
case <-t:
|
||||
err := exec.Command("sh", "-c", "pgrep prometheus").Run()
|
||||
if err != nil {
|
||||
logrus.Error("the prometheus process is exited, ready to restart it.")
|
||||
err := exec.Command("sh", "-c", cmd).Run()
|
||||
if err == nil {
|
||||
logrus.Error("Failed to restart the prometheus daemon: ", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (p *Manager) RestartDaemon() error {
|
||||
request, err := http.NewRequest("POST", p.ApiUrl+"/-/reload", nil)
|
||||
if err != nil {
|
||||
logrus.Error("Create request to load config error: ", err)
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = p.httpClient.Do(request)
|
||||
if err != nil {
|
||||
logrus.Error("load config error: ", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Manager) UpdateScrape(scrape *ScrapeConfig) {
|
||||
logrus.Debugf("update scrape: %+v", scrape)
|
||||
|
||||
p.l.Lock()
|
||||
defer p.l.Unlock()
|
||||
|
||||
exist := false
|
||||
for i, s := range p.Config.ScrapeConfigs {
|
||||
if s.JobName == scrape.JobName {
|
||||
p.Config.ScrapeConfigs[i] = scrape
|
||||
exist = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !exist {
|
||||
p.Config.ScrapeConfigs = append(p.Config.ScrapeConfigs, scrape)
|
||||
}
|
||||
|
||||
p.SaveConfig()
|
||||
p.RestartDaemon()
|
||||
}
|
||||
|
@ -57,7 +57,8 @@ function build::image() {
|
||||
elif [ "$1" = "chaos" ];then
|
||||
docker run --rm -v `pwd`:${WORK_DIR} -w ${WORK_DIR} -it golang:1.8.3 go build -ldflags '-w -s' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/builder
|
||||
elif [ "$1" = "monitor" ];then
|
||||
GOOS=linux go build -ldflags '-w -s' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/builder
|
||||
docker run --rm -v `pwd`:${WORK_DIR} -w ${WORK_DIR} -it golang:1.8.3 go build -ldflags "-w -s -extldflags '-static'" -tags 'netgo static_build' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/$1
|
||||
#go build -ldflags "-w -s -extldflags '-static'" -tags 'netgo static_build' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/monitor
|
||||
else
|
||||
docker run --rm -v `pwd`:${WORK_DIR} -w ${WORK_DIR} -it golang:1.8.3 go build -ldflags '-w -s' -o ${DOCKER_PATH}/${BASE_NAME}-$1 ./cmd/$1
|
||||
fi
|
||||
|
Loading…
Reference in New Issue
Block a user