diff --git a/api/controller/websocket.go b/api/controller/websocket.go index cd9596334..571b347f0 100644 --- a/api/controller/websocket.go +++ b/api/controller/websocket.go @@ -36,7 +36,7 @@ type DockerConsole struct { } var defaultDockerConsoleEndpoints = []string{"127.0.0.1:7171"} -var defaultEventLogEndpoints = []string{"127.0.0.1:6363"} +var defaultEventLogEndpoints = []string{"local=>127.0.0.1:6363"} var defaultEtcdEndpoints = []string{"127.0.0.1:2379"} var dockerConsole *DockerConsole diff --git a/api/discover/discover.go b/api/discover/discover.go index 82fee97bf..19f76af28 100644 --- a/api/discover/discover.go +++ b/api/discover/discover.go @@ -104,7 +104,7 @@ func (e *defalt) UpdateEndpoints(endpoints ...*corediscoverconfig.Endpoint) { var endStr []string for _, end := range endpoints { if end.URL != "" { - endStr = append(endStr, end.URL) + endStr = append(endStr, end.Name + "=>" + end.URL) } } logrus.Debugf("endstr is %v, name is %v", endStr, e.name) diff --git a/api/handler/proxy.go b/api/handler/proxy.go index 83b0b8622..dc5011501 100644 --- a/api/handler/proxy.go +++ b/api/handler/proxy.go @@ -27,6 +27,7 @@ import ( var nodeProxy proxy.Proxy var builderProxy proxy.Proxy var prometheusProxy proxy.Proxy +var monitorProxy proxy.Proxy //InitProxy 初始化 func InitProxy(conf option.Config) { @@ -42,6 +43,10 @@ func InitProxy(conf option.Config) { prometheusProxy = proxy.CreateProxy("prometheus", "http", []string{"127.0.0.1:9999"}) discover.GetEndpointDiscover(conf.EtcdEndpoint).AddProject("prometheus", prometheusProxy) } + if monitorProxy == nil { + monitorProxy = proxy.CreateProxy("monitor", "http", []string{"127.0.0.1:3329"}) + discover.GetEndpointDiscover(conf.EtcdEndpoint).AddProject("monitor", monitorProxy) + } } @@ -59,3 +64,8 @@ func GetBuilderProxy() proxy.Proxy { func GetPrometheusProxy() proxy.Proxy { return prometheusProxy } + +//GetMonitorProxy GetMonitorProxy +func GetMonitorProxy() proxy.Proxy { + return monitorProxy +} \ No newline at end of file diff --git a/api/middleware/mideware.go b/api/middleware/mideware.go index e35f0ba3e..9b24ef388 100644 --- a/api/middleware/mideware.go +++ b/api/middleware/mideware.go @@ -162,6 +162,10 @@ func Proxy(next http.Handler) http.Handler { handler.GetNodeProxy().Proxy(w, r) return } + if strings.HasPrefix(r.RequestURI, "/v2/rules") { + handler.GetMonitorProxy().Proxy(w, r) + return + } next.ServeHTTP(w, r) } return http.HandlerFunc(fn) diff --git a/api/proxy/http_proxy.go b/api/proxy/http_proxy.go index c7a5656e4..ca3eb86b4 100644 --- a/api/proxy/http_proxy.go +++ b/api/proxy/http_proxy.go @@ -45,7 +45,15 @@ func (h *HTTPProxy) Proxy(w http.ResponseWriter, r *http.Request) { //UpdateEndpoints 更新端点 func (h *HTTPProxy) UpdateEndpoints(endpoints ...string) { - h.endpoints = CreateEndpoints(endpoints) + ends := []string{} + for _, end := range endpoints { + if kv := strings.Split(end, "=>"); len(kv) > 1 { + ends = append(ends, kv[1]) + } else { + ends = append(ends, end) + } + } + h.endpoints = CreateEndpoints(ends) } //Do do proxy @@ -60,5 +68,13 @@ func (h *HTTPProxy) Do(r *http.Request) (*http.Response, error) { } func createHTTPProxy(name string, endpoints []string) *HTTPProxy { - return &HTTPProxy{name, CreateEndpoints(endpoints), NewRoundRobin()} + ends := []string{} + for _, end := range endpoints { + if kv := strings.Split(end, "=>"); len(kv) > 1 { + ends = append(ends, kv[1]) + } else { + ends = append(ends, end) + } + } + return &HTTPProxy{name, CreateEndpoints(ends), NewRoundRobin()} } diff --git a/api/proxy/lb.go b/api/proxy/lb.go index 627d676d9..0f87bea01 100644 --- a/api/proxy/lb.go +++ b/api/proxy/lb.go @@ -19,12 +19,9 @@ package proxy import ( - "io/ioutil" "net/http" "strings" "sync/atomic" - - "github.com/Sirupsen/logrus" ) // RoundRobin round robin loadBalance impl @@ -44,6 +41,20 @@ func (e Endpoint) String() string { return string(e) } +func (e Endpoint) GetName() string { + if kv := strings.Split(string(e), "=>"); len(kv) > 1 { + return kv[0] + } + return string(e) +} + +func (e Endpoint) GetAddr() string { + if kv := strings.Split(string(e), "=>"); len(kv) > 1 { + return kv[1] + } + return string(e) +} + //EndpointList EndpointList type EndpointList []Endpoint @@ -122,39 +133,34 @@ type SelectBalance struct { //NewSelectBalance 创建选择性负载均衡 func NewSelectBalance() *SelectBalance { - body, err := ioutil.ReadFile("/etc/goodrain/host_id_list.conf") - if err != nil { - logrus.Error("read host id list error,", err.Error()) - } - sb := &SelectBalance{ + return &SelectBalance{ hostIDMap: map[string]string{"local": "127.0.0.1:6363"}, } - if body != nil && len(body) > 0 { - listStr := string(body) - hosts := strings.Split(strings.TrimSpace(listStr), ";") - for _, h := range hosts { - info := strings.Split(strings.Trim(h, "\r\n"), "=") - if len(info) == 2 { - sb.hostIDMap[info[0]] = info[1] - } - } - } - logrus.Info("Docker log websocket server endpoints:", sb.hostIDMap) - return sb } //Select 负载 func (s *SelectBalance) Select(r *http.Request, endpoints EndpointList) Endpoint { - if r.URL != nil { - hostID := r.URL.Query().Get("host_id") - if e, ok := s.hostIDMap[hostID]; ok { - if endpoints.HaveEndpoint(e) { - return Endpoint(e) - } + if r.URL == nil { + return Endpoint(s.hostIDMap["local"]) + } + + id2ip := map[string]string{"local": "127.0.0.1:6363"} + for _, end := range endpoints { + if kv := strings.Split(string(end), "=>"); len(kv) > 1 { + id2ip[kv[0]] = kv[1] } } + + if r.URL != nil { + hostID := r.URL.Query().Get("host_id") + if e, ok := id2ip[hostID]; ok { + return Endpoint(e) + } + } + if len(endpoints) > 0 { return endpoints[0] } + return Endpoint(s.hostIDMap["local"]) } diff --git a/api/proxy/proxy.go b/api/proxy/proxy.go index 91e05ef1b..a7e2ccf81 100644 --- a/api/proxy/proxy.go +++ b/api/proxy/proxy.go @@ -24,7 +24,7 @@ import "net/http" type Proxy interface { Proxy(w http.ResponseWriter, r *http.Request) Do(r *http.Request) (*http.Response, error) - UpdateEndpoints(endpoints ...string) + UpdateEndpoints(endpoints ...string) // format: ["name=>ip:port", ...] } //CreateProxy 创建代理 diff --git a/api/proxy/websocket_proxy.go b/api/proxy/websocket_proxy.go index 9cb56c211..99ee27f38 100644 --- a/api/proxy/websocket_proxy.go +++ b/api/proxy/websocket_proxy.go @@ -127,11 +127,12 @@ type WebSocketProxy struct { func (h *WebSocketProxy) Proxy(w http.ResponseWriter, req *http.Request) { endpoint := h.lb.Select(req, h.endpoints) + logrus.Info("Proxy webSocket to: ", endpoint) path := req.RequestURI if strings.Contains(path, "?") { path = path[:strings.Index(path, "?")] } - u := url.URL{Scheme: "ws", Host: endpoint.String(), Path: path} + u := url.URL{Scheme: "ws", Host: endpoint.GetAddr(), Path: path} logrus.Infof("connecting to %s", u.String()) // Pass headers from the incoming request to the dialer to forward them to // the final destinations. diff --git a/api/region/monitor.go b/api/region/monitor.go new file mode 100644 index 000000000..31d62eada --- /dev/null +++ b/api/region/monitor.go @@ -0,0 +1,162 @@ +// RAINBOND, Application Management Platform +// Copyright (C) 2014-2017 Goodrain Co., Ltd. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. For any non-GPL usage of Rainbond, +// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd. +// must be obtained first. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package region + +import ( + "github.com/goodrain/rainbond/api/util" + "github.com/goodrain/rainbond/node/api/model" + utilhttp "github.com/goodrain/rainbond/util/http" + "fmt" + "encoding/json" + "bytes" + "os" + "errors" + "io/ioutil" + "github.com/Sirupsen/logrus" + "gopkg.in/yaml.v2" +) + +//ClusterInterface cluster api +type MonitorInterface interface { + GetRule(name string) (*model.AlertingNameConfig, *util.APIHandleError) + GetAllRule() (*model.AlertingRulesConfig, *util.APIHandleError) + DelRule(name string) (*utilhttp.ResponseBody, *util.APIHandleError) + AddRule(path string) (*utilhttp.ResponseBody, *util.APIHandleError) + RegRule(ruleName string, path string) (*utilhttp.ResponseBody, *util.APIHandleError) +} + +func (r *regionImpl) Monitor() MonitorInterface { + return &monitor{prefix: "/v2/rules", regionImpl: *r} +} + +type monitor struct { + regionImpl + prefix string +} + +func (m *monitor) GetRule(name string) (*model.AlertingNameConfig, *util.APIHandleError) { + var ac model.AlertingNameConfig + var decode utilhttp.ResponseBody + decode.Bean = &ac + code, err := m.DoRequest(m.prefix+"/"+name, "GET", nil, &decode) + if err != nil { + return nil, handleErrAndCode(err, code) + } + if code != 200 { + logrus.Error("Return failure message ", decode.Bean) + return nil, util.CreateAPIHandleError(code, fmt.Errorf("get alerting rules error code %d", code)) + } + return &ac, nil +} + +func (m *monitor) GetAllRule() (*model.AlertingRulesConfig, *util.APIHandleError) { + var ac model.AlertingRulesConfig + var decode utilhttp.ResponseBody + decode.Bean = &ac + code, err := m.DoRequest(m.prefix+"/all", "GET", nil, &decode) + if err != nil { + return nil, handleErrAndCode(err, code) + } + if code != 200 { + logrus.Error("Return failure message ", decode.Bean) + return nil, util.CreateAPIHandleError(code, fmt.Errorf("get alerting rules error code %d", code)) + } + return &ac, nil +} + +func (m *monitor) DelRule(name string) (*utilhttp.ResponseBody, *util.APIHandleError) { + var decode utilhttp.ResponseBody + code, err := m.DoRequest(m.prefix+"/"+name, "DELETE", nil, &decode) + if err != nil { + return nil, handleErrAndCode(err, code) + } + if code != 200 { + logrus.Error("Return failure message ", decode.Bean) + return nil, util.CreateAPIHandleError(code, fmt.Errorf("del alerting rules error code %d", code)) + } + return &decode, nil +} + +func (m *monitor) AddRule(path string) (*utilhttp.ResponseBody, *util.APIHandleError) { + _, err := os.Stat(path) + if err!= nil || !os.IsExist(err){ + return nil, util.CreateAPIHandleError(400, errors.New("file does not exist")) + } + + content, err := ioutil.ReadFile(path) + if err != nil { + logrus.Error("Failed to read AlertingRules config file: ", err.Error()) + return nil, util.CreateAPIHandleError(400, err) + } + var rulesConfig model.AlertingNameConfig + if err := yaml.Unmarshal(content, &rulesConfig); err != nil { + logrus.Error("Unmarshal AlertingRulesConfig config string to object error.", err.Error()) + return nil, util.CreateAPIHandleError(400, err) + + } + var decode utilhttp.ResponseBody + body, err := json.Marshal(rulesConfig) + if err != nil { + return nil, util.CreateAPIHandleError(400, err) + } + code, err := m.DoRequest(m.prefix, "POST", bytes.NewBuffer(body), &decode) + if err != nil { + println("====err>",code,err) + return nil, handleErrAndCode(err, code) + } + if code != 200 { + logrus.Error("Return failure message ", decode.Bean) + return nil, util.CreateAPIHandleError(code, fmt.Errorf("add alerting rules error code %d", code)) + } + return &decode, nil +} + +func (m *monitor) RegRule(ruleName string, path string) (*utilhttp.ResponseBody, *util.APIHandleError) { + _, err := os.Stat(path) + if err!= nil || !os.IsExist(err){ + return nil, util.CreateAPIHandleError(400, errors.New("file does not exist")) + } + + content, err := ioutil.ReadFile(path) + if err != nil { + logrus.Error("Failed to read AlertingRules config file: ", err.Error()) + return nil, util.CreateAPIHandleError(400, err) + } + var rulesConfig model.AlertingNameConfig + if err := yaml.Unmarshal(content, &rulesConfig); err != nil { + logrus.Error("Unmarshal AlertingRulesConfig config string to object error.", err.Error()) + return nil, util.CreateAPIHandleError(400, err) + + } + var decode utilhttp.ResponseBody + body, err := json.Marshal(rulesConfig) + if err != nil { + return nil, util.CreateAPIHandleError(400, err) + } + code, err := m.DoRequest(m.prefix+"/"+ruleName, "PUT", bytes.NewBuffer(body), &decode) + if err != nil { + println("====err>",code,err) + return nil, handleErrAndCode(err, code) + } + if code != 200 { + logrus.Error("Return failure message ", decode.Bean) + return nil, util.CreateAPIHandleError(code, fmt.Errorf("add alerting rules error code %d", code)) + } + return &decode, nil +} diff --git a/api/region/region.go b/api/region/region.go index 2b3123c7f..622aa13fc 100644 --- a/api/region/region.go +++ b/api/region/region.go @@ -55,6 +55,7 @@ type Region interface { Cluster() ClusterInterface Configs() ConfigsInterface Version() string + Monitor() MonitorInterface DoRequest(path, method string, body io.Reader, decode *utilhttp.ResponseBody) (int, error) } diff --git a/builder/exector/build_from_sourcecode_run.go b/builder/exector/build_from_sourcecode_run.go index f9c48c1e7..9acbe53c4 100644 --- a/builder/exector/build_from_sourcecode_run.go +++ b/builder/exector/build_from_sourcecode_run.go @@ -285,9 +285,9 @@ func (i *SourceCodeBuildItem) buildImage() error { tag := i.DeployVersion buildImageName := strings.ToLower(fmt.Sprintf("%s/%s:%s", REGISTRYDOMAIN, name, tag)) args := make(map[string]string, 5) - for k, v := range args { - if strings.Contains(k, "BUILD_ARG_") { - args[k] = v + for k, v := range i.BuildEnvs { + if ks := strings.Split(k, "ARG_"); len(ks) > 1 { + args[ks[1]] = v } } buildOptions := types.ImageBuildOptions{ diff --git a/builder/parser/source_code.go b/builder/parser/source_code.go index 3eb8f3cfc..b43c0757b 100644 --- a/builder/parser/source_code.go +++ b/builder/parser/source_code.go @@ -405,13 +405,15 @@ func (d *SourceCodeParse) parseDockerfileInfo(dockerfile string) bool { length := len(cm.Value) for i := 0; i < length; i++ { if kv := strings.Split(cm.Value[i], "="); len(kv) > 1 { - d.envs[kv[0]] = &Env{Name: kv[0], Value: kv[1]} + key := "BUILD_ARG_" + kv[0] + d.envs[key] = &Env{Name: key, Value: kv[1]} } else { if i + 1 >= length { logrus.Error("Parse ARG format error at ", cm.Value[1]) continue } - d.envs[cm.Value[i]] = &Env{Name: cm.Value[i], Value: cm.Value[i+1]} + key := "BUILD_ARG_" + cm.Value[i] + d.envs[key] = &Env{Name: key, Value: cm.Value[i+1]} i++ } } diff --git a/cmd/eventlog/server/server.go b/cmd/eventlog/server/server.go index 688c28e81..f686c1479 100644 --- a/cmd/eventlog/server/server.go +++ b/cmd/eventlog/server/server.go @@ -38,6 +38,7 @@ import ( "github.com/Sirupsen/logrus" "github.com/goodrain/rainbond/eventlog/db" "github.com/spf13/pflag" + "github.com/goodrain/rainbond/util" ) type LogServer struct { @@ -241,8 +242,15 @@ func (s *LogServer) Run() error { } defer udpkeepalive.Stop() + hostID, err := util.ReadHostID(s.Conf.Cluster.Discover.NodeIDFile) + if err != nil { + return err + } + + id := hostID[len(hostID)-12:] + httpkeepalive, err := discover.CreateKeepAlive(s.Conf.Cluster.Discover.EtcdAddr, "event_log_event_http", - s.Conf.Cluster.Discover.InstanceIP, s.Conf.Cluster.Discover.InstanceIP, s.Conf.WebSocket.BindPort) + id, s.Conf.Cluster.Discover.InstanceIP, s.Conf.WebSocket.BindPort) if err != nil { return err } diff --git a/cmd/monitor/main.go b/cmd/monitor/main.go index 0e5ba622e..27a67c03a 100644 --- a/cmd/monitor/main.go +++ b/cmd/monitor/main.go @@ -41,7 +41,7 @@ func main() { c.CompleteConfig() // start prometheus daemon and watching tis status in all time, exit monitor process if start failed - a := prometheus.NewRulesManager() + a := prometheus.NewRulesManager(c) p := prometheus.NewManager(c, a) controllerManager := controller.NewControllerManager(a,p) diff --git a/cmd/monitor/option/option.go b/cmd/monitor/option/option.go index b6872bf07..5d9cf93a8 100644 --- a/cmd/monitor/option/option.go +++ b/cmd/monitor/option/option.go @@ -38,6 +38,8 @@ type Config struct { StartArgs []string ConfigFile string + AlertingRulesFile string + AlertManagerUrl string LocalStoragePath string Web Web Tsdb Tsdb @@ -96,6 +98,8 @@ func NewConfig() *Config { LogLevel: "info", ConfigFile: "/etc/prometheus/prometheus.yml", + AlertingRulesFile: "/etc/prometheus/rules.yml", + AlertManagerUrl: "", LocalStoragePath: "/prometheusdata", WebTimeout: "5m", RemoteFlushDeadline: "1m", @@ -128,6 +132,10 @@ func (c *Config) AddFlag(cmd *pflag.FlagSet) { func (c *Config) AddPrometheusFlag(cmd *pflag.FlagSet) { cmd.StringVar(&c.ConfigFile, "config.file", c.ConfigFile, "Prometheus configuration file path.") + cmd.StringVar(&c.AlertManagerUrl, "alertmanager.url", c.AlertManagerUrl, "AlertManager url.") + + cmd.StringVar(&c.AlertingRulesFile, "rules-config.file", c.AlertingRulesFile, "Prometheus alerting rules config file path.") + cmd.StringVar(&c.Web.ListenAddress, "web.listen-address", c.Web.ListenAddress, "Address to listen on for UI, API, and telemetry.") cmd.StringVar(&c.WebTimeout, "web.read-timeout", c.WebTimeout, "Maximum duration before timing out read of the request, and closing idle connections.") @@ -205,6 +213,9 @@ func (c *Config) CompleteConfig() { if c.Web.EnableLifecycle { defaultOptions += " --web.enable-lifecycle" } + if c.AlertManagerUrl != "" { + defaultOptions += " --alertmanager.url="+c.AlertManagerUrl + } args := strings.Split(defaultOptions, " ") c.StartArgs = append(c.StartArgs, os.Args[0]) diff --git a/cmd/webcli/server/server.go b/cmd/webcli/server/server.go index fb2ace8bb..7841b643d 100644 --- a/cmd/webcli/server/server.go +++ b/cmd/webcli/server/server.go @@ -25,7 +25,7 @@ import ( "syscall" "github.com/goodrain/rainbond/cmd/webcli/option" - "github.com/goodrain/rainbond/discover" + discover "github.com/goodrain/rainbond/discover.v2" "github.com/goodrain/rainbond/webcli/app" "github.com/Sirupsen/logrus" diff --git a/grctl/cmd/cluster.go b/grctl/cmd/cluster.go index f0393e804..4a92b3b55 100644 --- a/grctl/cmd/cluster.go +++ b/grctl/cmd/cluster.go @@ -55,6 +55,7 @@ func getClusterInfo(c *cli.Context) error { table.AddRow("DistributedDisk", fmt.Sprintf("%dGb/%dGb", clusterInfo.ReqDisk/1024/1024/1024, clusterInfo.CapDisk/1024/1024/1024), fmt.Sprintf("%.2f", float32(clusterInfo.ReqDisk*100)/float32(clusterInfo.CapDisk))+"%") fmt.Println(table) + //show services health status list, err := clients.RegionClient.Nodes().List() handleErr(err) @@ -111,7 +112,7 @@ func getServicesHealthy(nodes []*client.HostNode) (map[string][]map[string]strin func summaryResult(list []map[string]string) (status string, errMessage string) { upNum := 0 - err := "N/A" + err := "" for _, v := range list { if v["type"] == "OutOfDisk" ||v["type"] == "DiskPressure"||v["type"] == "MemoryPressure"||v["type"] == "InstallNotReady"{ if v["status"] == "False" { @@ -130,9 +131,9 @@ func summaryResult(list []map[string]string) (status string, errMessage string) } } if upNum == len(list){ - status = "\033[0;37;42m" + strconv.Itoa(upNum) + "/" + strconv.Itoa(len(list)) + " \033[0m" + status = "\033[0;32;32m" + strconv.Itoa(upNum) + "/" + strconv.Itoa(len(list)) + " \033[0m" }else { - status = "\033[0;37;41m " + strconv.Itoa(upNum) + "/" + strconv.Itoa(len(list)) + " \033[0m" + status = "\033[0;31;31m " + strconv.Itoa(upNum) + "/" + strconv.Itoa(len(list)) + " \033[0m" } errMessage = err diff --git a/grctl/cmd/cmd.go b/grctl/cmd/cmd.go index d37330c23..60d23c0fc 100644 --- a/grctl/cmd/cmd.go +++ b/grctl/cmd/cmd.go @@ -36,6 +36,7 @@ func GetCmds() []cli.Command { cmds = append(cmds, NewCmdExec()) cmds = append(cmds, NewCmdInit()) cmds = append(cmds, NewCmdShow()) + cmds = append(cmds, NewCmdAlerting()) //task相关命令 //cmds = append(cmds, NewCmdTasks()) diff --git a/grctl/cmd/init.go b/grctl/cmd/init.go index 68e4fccea..c6f5c2122 100644 --- a/grctl/cmd/init.go +++ b/grctl/cmd/init.go @@ -19,23 +19,18 @@ package cmd import ( - "bufio" - "bytes" "fmt" "io/ioutil" - "net/http" "os/exec" - "strings" - "time" "github.com/Sirupsen/logrus" "github.com/urfave/cli" //"github.com/goodrain/rainbond/grctl/clients" - "github.com/goodrain/rainbond/api/util" + "os" + + "github.com/goodrain/rainbond/builder/sources" "github.com/goodrain/rainbond/grctl/clients" - "github.com/goodrain/rainbond/node/api/model" - coreutil "github.com/goodrain/rainbond/util" ) //NewCmdInit grctl init @@ -49,7 +44,13 @@ func NewCmdInit() cli.Command { }, cli.StringFlag{ Name: "type", - Usage: "node type:manage/compute, manage", + Usage: "node type: manage or compute", + Value: "manage", + }, + cli.StringFlag{ + Name: "work_dir", + Usage: "clone source code to the work directory", + Value: "/opt/rainbond/install", }, cli.StringFlag{ Name: "mip", @@ -58,10 +59,12 @@ func NewCmdInit() cli.Command { cli.StringFlag{ Name: "repo_ver", Usage: "repo version,3.4", + Value: "master", }, cli.StringFlag{ Name: "install_type", - Usage: "online/offline ,online", + Usage: "online or offline", + Value: "online", }, cli.BoolFlag{ Name: "test", @@ -113,117 +116,44 @@ func NewCmdInstallStatus() cli.Command { } func initCluster(c *cli.Context) error { - url := "http://repo.goodrain.com/release/3.5/gaops/jobs/install/prepare/init.sh" - if c.Bool("test") { - url = "http://dev.repo.goodrain.com/gaops/jobs/install/prepare/init.sh" + // check if the rainbond is already installed + fmt.Println("Checking install enviremant.") + _, err := os.Stat("/tmp/rainbond.success") + if err == nil { + fmt.Println("Rainbond is already installed, if you whant reinstall, then please delete the file: /tmp/rainbond.success") + return nil } - resp, err := http.Get(url) - if err != nil { - logrus.Errorf("error get init script,details %s", err.Error()) - return err + // download source code from github if in online model + if c.String("install_type") == "online" { + fmt.Println("Download rainbond install package.") + csi := sources.CodeSourceInfo{ + RepositoryURL: "https://github.com/goodrain/rainbond-install.git", + Branch: c.String("repo_ver"), + } + os.RemoveAll(c.String("work_dir")) + os.MkdirAll(c.String("work_dir"), 0755) + _, err := sources.GitClone(csi, c.String("work_dir"), nil, 5) + if err != nil { + println(err.Error()) + return err + } } - defer resp.Body.Close() - b, _ := ioutil.ReadAll(resp.Body) - args := []string{c.String("etcd"), c.String("type"), c.String("mip"), c.String("repo_ver"), c.String("install_type")} - arg := strings.Join(args, " ") - argCheck := strings.Join(args, "") - if len(argCheck) > 0 { - arg += ";" - } else { - arg = "" - } - fmt.Println("begin init cluster first node,please don't exit,wait install") - cmd := exec.Command("bash", "-c", arg+string(b)) - var buffe bytes.Buffer - cmd.Stderr = &buffe - stdout, _ := cmd.StdoutPipe() - go func() { - read := bufio.NewReader(stdout) - for { - line, _, err := read.ReadLine() - if err != nil { - return - } - fmt.Println(string(line)) - } - }() - if err := cmd.Run(); err != nil { - logrus.Errorf("current node init error,%s", err.Error()) - return err - } - //检测并设置init的结果 - result := buffe.String() - index := strings.Index(result, "{") - jsonOutPut := result - if index > -1 { - jsonOutPut = result[index:] - } - fmt.Println("Result:" + jsonOutPut) - output, err := model.ParseTaskOutPut(jsonOutPut) + // start setup script to install rainbond + fmt.Println("Begin init cluster first node,please don't exit,wait install") + cmd := exec.Command("bash", "-c", fmt.Sprintf("cd %s ; ./setup.sh %s %s", c.String("work_dir"), c.String("install_type"), c.String("repo_ver"))) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + err = cmd.Run() if err != nil { - logrus.Errorf("get init current node result error:%s", err.Error()) - return err - } - var newConfigs []model.ConfigUnit - if output.Global != nil { - for k, v := range output.Global { - if strings.Index(v, ",") > -1 { - values := strings.Split(v, ",") - coreutil.Deweight(&values) - newConfigs = append(newConfigs, model.ConfigUnit{ - Name: strings.ToUpper(k), - Value: values, - ValueType: "array", - IsConfigurable: false, - }) - } else { - newConfigs = append(newConfigs, model.ConfigUnit{ - Name: strings.ToUpper(k), - Value: v, - ValueType: "string", - IsConfigurable: false, - }) - } - } - } - var gc *model.GlobalConfig - var error *util.APIHandleError - for i := 0; i < 10; i++ { - time.Sleep(time.Second * 2) - gc, error = clients.RegionClient.Configs().Get() - if err == nil && gc != nil { - for _, nc := range newConfigs { - gc.Add(nc) - } - error = clients.RegionClient.Configs().Put(gc) - break - } - } - if error != nil { - logrus.Errorf("Update Datacenter configs error,please check node status") - return err - } - //获取当前节点ID - hostID, err := coreutil.ReadHostID("") - if err != nil { - logrus.Errorf("read nodeid error,please check node status") + println(err.Error()) return err } - //error = clients.NodeClient.Tasks().Exec("check_manage_base_services", []string{hostID}) - //if error != nil { - // logrus.Errorf("error exec task:%s,details %s", "check_manage_base_services", error.String()) - // return error.Err - //} - error = clients.RegionClient.Tasks().Exec("check_manage_services", []string{hostID}) - if error != nil { - logrus.Errorf("error exec task:%s,details %s", "check_manage_services", error.String()) - return error.Err - } - //Status("check_manage_base_services", []string{hostID}) - Status("check_manage_services", []string{hostID}) + ioutil.WriteFile("/tmp/rainbond.success", []byte(c.String("repo_ver")), 0644) + fmt.Println("install manage node success,next you can :") fmt.Println(" add compute node--grctl node add -h") fmt.Println(" install compute node--grctl install compute -h") diff --git a/grctl/cmd/monitor.go b/grctl/cmd/monitor.go new file mode 100644 index 000000000..5ecd77929 --- /dev/null +++ b/grctl/cmd/monitor.go @@ -0,0 +1,111 @@ +package cmd + +import ( + "github.com/urfave/cli" + "github.com/Sirupsen/logrus" + "github.com/goodrain/rainbond/grctl/clients" + "fmt" + "github.com/ghodss/yaml" + "errors" +) + +//NewCmdNode NewCmdNode +func NewCmdAlerting() cli.Command { + c := cli.Command{ + Name: "alerting", + Usage: "监控报警。grctl alerting", + Subcommands: []cli.Command{ + { + Name: "get", + Usage: "get rule_name", + Action: func(c *cli.Context) error { + Common(c) + name := c.Args().First() + if name == "" { + logrus.Errorf("need args") + return nil + } + v, err := clients.RegionClient.Monitor().GetRule(name) + handleErr(err) + rule, _ := yaml.Marshal(v) + fmt.Println(string(rule)) + return nil + }, + }, + { + Name: "list", + Usage: "list", + Action: func(c *cli.Context) error { + Common(c) + list, err := clients.RegionClient.Monitor().GetAllRule() + handleErr(err) + ruleList, _ := yaml.Marshal(list) + fmt.Println(string(ruleList)) + return nil + }, + }, + { + Name: "del", + Usage: "del rule_name", + Action: func(c *cli.Context) error { + Common(c) + name := c.Args().First() + if name == "" { + logrus.Errorf("need args") + return nil + } + _, err := clients.RegionClient.Monitor().DelRule(name) + handleErr(err) + fmt.Println("Delete rule succeeded") + return nil + }, + }, + { + Name: "add", + Usage: "add FilePath", + Action: func(c *cli.Context) error { + Common(c) + filePath := c.Args().First() + if filePath == "" { + logrus.Errorf("need args") + return nil + } + _, err := clients.RegionClient.Monitor().AddRule(filePath) + handleErr(err) + fmt.Println("Add rule successfully") + return nil + + }, + }, + { + Name: "modify", + Usage: "modify 修改规则", + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "RulesName,rn", + Value: "", + Usage: "RulesName", + }, + cli.StringFlag{ + Name: "RulesPath,rp", + Value: "", + Usage: "RulesPath", + }, + }, + Action: func(c *cli.Context) error { + Common(c) + if c.IsSet("RulesName") && c.IsSet("RulesPath") { + path := c.String("RulesPath") + ruleName := c.String("RulesName") + _, err := clients.RegionClient.Monitor().RegRule(ruleName, path) + handleErr(err) + fmt.Println("Modify rule successfully") + return nil + } + return errors.New("rule name or rules not null") + }, + }, + }, + } + return c +} diff --git a/grctl/cmd/node.go b/grctl/cmd/node.go index 91bdb0789..25c36f73b 100644 --- a/grctl/cmd/node.go +++ b/grctl/cmd/node.go @@ -102,9 +102,9 @@ func fileExist(path string) bool { func handleStatus(serviceTable *termtables.Table, ready bool, v *client.HostNode) { var formatReady string if ready == false{ - formatReady = "\033[0;37;41m false \033[0m" + formatReady = "\033[0;31;31m false \033[0m" }else { - formatReady = "\033[0;37;42m true \033[0m" + formatReady = "\033[0;32;32m true \033[0m" } if v.Role.HasRule("compute") && !v.Role.HasRule("manage") { serviceTable.AddRow(v.ID, v.InternalIP, v.HostName, v.Role.String(), v.Mode, v.Status, v.Alived, !v.Unschedulable, formatReady) @@ -125,12 +125,12 @@ func handleResult(serviceTable *termtables.Table, v *client.HostNode) { var formatReady string if v.Status == client.ConditionFalse{ if v.Type == client.OutOfDisk || v.Type == client.MemoryPressure || v.Type==client.DiskPressure ||v.Type==client.InstallNotReady{ - formatReady = "\033[0;37;42m false \033[0m" + formatReady = "\033[0;32;32m false \033[0m" }else { - formatReady = "\033[0;37;41m false \033[0m" + formatReady = "\033[0;31;31m false \033[0m" } }else { - formatReady = "\033[0;37;42m true \033[0m" + formatReady = "\033[0;32;32m true \033[0m" } serviceTable.AddRow(string(v.Type), formatReady, handleMessage(string(v.Status), v.Message)) } @@ -141,11 +141,11 @@ func extractReady(serviceTable *termtables.Table, v *client.HostNode, name strin if string(v.Type) == name{ var formatReady string if v.Status == client.ConditionFalse{ - formatReady = "\033[0;37;41m false \033[0m" + formatReady = "\033[0;31;31m false \033[0m" }else { - formatReady = "\033[0;37;42m true \033[0m" + formatReady = "\033[0;32;32m true \033[0m" } - serviceTable.AddRow(string(v.Type), formatReady, handleMessage(string(v.Status), v.Message)) + serviceTable.AddRow("\033[0;33;33m "+string(v.Type)+" \033[0m", formatReady, handleMessage(string(v.Status), v.Message)) } } } diff --git a/monitor/api/controller/rules.go b/monitor/api/controller/rules.go index 5b47c13c7..7f5bb229f 100644 --- a/monitor/api/controller/rules.go +++ b/monitor/api/controller/rules.go @@ -6,9 +6,9 @@ import ( httputil "github.com/goodrain/rainbond/util/http" "github.com/Sirupsen/logrus" - "gopkg.in/yaml.v2" "github.com/goodrain/rainbond/monitor/prometheus" "github.com/go-chi/chi" + "encoding/json" ) type ControllerManager struct { @@ -25,42 +25,34 @@ func NewControllerManager(a *prometheus.AlertingRulesManager, p *prometheus.Mana } func (c *ControllerManager) AddRules(w http.ResponseWriter, r *http.Request) { + logrus.Info("add rules") in, err := ioutil.ReadAll(r.Body) if err != nil { httputil.ReturnError(r, w, 400, err.Error()) return } - + println(string(in)) var RulesConfig prometheus.AlertingNameConfig - err = ioutil.WriteFile("/etc/prometheus/cache_rule.yml", in, 0644) - if err != nil { - logrus.Error(err.Error()) - } - - content, err := ioutil.ReadFile("/etc/prometheus/cache_rule.yml") - if err != nil { - logrus.Error( err) - - } - - if err := yaml.Unmarshal(content, &RulesConfig); err != nil { - logrus.Error("Unmarshal prometheus alerting rules config string to object error.", err.Error()) + unmarshalErr := json.Unmarshal(in, &RulesConfig) + if unmarshalErr != nil { + logrus.Info(unmarshalErr) httputil.ReturnError(r, w, 400, err.Error()) return } - c.Rules.RulesConfig.LoadAlertingRulesConfig() + + c.Rules.LoadAlertingRulesConfig() group := c.Rules.RulesConfig.Groups - for _,v := range group{ - if v.Name == RulesConfig.Name{ + for _, v := range group { + if v.Name == RulesConfig.Name { httputil.ReturnError(r, w, 400, "Rule already exists") return } } - group = append(group, &RulesConfig) - c.Rules.RulesConfig.SaveAlertingRulesConfig() + c.Rules.RulesConfig.Groups = group + c.Rules.SaveAlertingRulesConfig() c.Manager.RestartDaemon() httputil.ReturnSuccess(r, w, "Add rule successfully") @@ -69,28 +61,29 @@ func (c *ControllerManager) AddRules(w http.ResponseWriter, r *http.Request) { func (c *ControllerManager) GetRules(w http.ResponseWriter, r *http.Request) { logrus.Infof("get rule") rulesName := chi.URLParam(r, "rules_name") - c.Rules.RulesConfig.LoadAlertingRulesConfig() + c.Rules.LoadAlertingRulesConfig() for _, v := range c.Rules.RulesConfig.Groups { if v.Name == rulesName { - res := v.Rules - httputil.ReturnSuccess(r, w, res) + httputil.ReturnSuccess(r, w, v) return } } - httputil.ReturnError(r, w, 400, "Rule does not exist") + httputil.ReturnError(r, w, 404, "Rule does not exist") } func (c *ControllerManager) DelRules(w http.ResponseWriter, r *http.Request) { logrus.Infof("delete rule") rulesName := chi.URLParam(r, "rules_name") - c.Rules.RulesConfig.LoadAlertingRulesConfig() + c.Rules.LoadAlertingRulesConfig() groupsList := c.Rules.RulesConfig.Groups for i, v := range groupsList { if v.Name == rulesName { - groupsList = append(groupsList[:i],groupsList[i+1:]...) - c.Rules.RulesConfig.SaveAlertingRulesConfig() + groupsList = append(groupsList[:i], groupsList[i+1:]...) + c.Rules.RulesConfig.Groups = groupsList + c.Rules.SaveAlertingRulesConfig() + c.Manager.RestartDaemon() httputil.ReturnSuccess(r, w, "successfully deleted") return } @@ -98,42 +91,42 @@ func (c *ControllerManager) DelRules(w http.ResponseWriter, r *http.Request) { httputil.ReturnSuccess(r, w, "") } - func (c *ControllerManager) RegRules(w http.ResponseWriter, r *http.Request) { + rulesName := chi.URLParam(r, "rules_name") in, err := ioutil.ReadAll(r.Body) if err != nil { httputil.ReturnError(r, w, 400, err.Error()) return } + println(string(in)) var RulesConfig prometheus.AlertingNameConfig - err = ioutil.WriteFile("/etc/prometheus/cache_rule.yml", in, 0644) - if err != nil { - logrus.Error(err.Error()) - } - - content, err := ioutil.ReadFile("/etc/prometheus/cache_rule.yml") - if err != nil { - logrus.Error( err) - - } - - if err := yaml.Unmarshal(content, &RulesConfig); err != nil { - logrus.Error("Unmarshal prometheus alerting rules config string to object error.", err.Error()) + unmarshalErr := json.Unmarshal(in, &RulesConfig) + if unmarshalErr != nil { + logrus.Info(unmarshalErr) httputil.ReturnError(r, w, 400, err.Error()) return } - c.Rules.RulesConfig.LoadAlertingRulesConfig() + + c.Rules.LoadAlertingRulesConfig() group := c.Rules.RulesConfig.Groups - for i,v := range group{ - if v.Name == RulesConfig.Name{ + for i, v := range group { + if v.Name == rulesName { group[i] = &RulesConfig + c.Manager.RestartDaemon() httputil.ReturnSuccess(r, w, "Update rule succeeded") - c.Rules.RulesConfig.SaveAlertingRulesConfig() + c.Rules.SaveAlertingRulesConfig() return } } - httputil.ReturnError(r, w, 400,"The rule to be updated does not exist") + httputil.ReturnError(r, w, 404, "The rule to be updated does not exist") +} + +func (c *ControllerManager) GetAllRules(w http.ResponseWriter, r *http.Request) { + logrus.Infof("get all rule") + c.Rules.LoadAlertingRulesConfig() + val := c.Rules.RulesConfig + httputil.ReturnSuccess(r, w, val) } diff --git a/monitor/api/router.go b/monitor/api/router.go index 4ab8b6e85..af731b680 100644 --- a/monitor/api/router.go +++ b/monitor/api/router.go @@ -32,16 +32,16 @@ func APIServer(c *controller.ControllerManager) *chi.Mux { r := chi.NewRouter() r.Route("/monitor", func(r chi.Router) { r.Get("/health", func(w http.ResponseWriter, r *http.Request) { - bean := map[string]string{"status":"health","info":"monitor service health"} + bean := map[string]string{"status": "health", "info": "monitor service health"} httputil.ReturnSuccess(r, w, bean) }) }) - r.Route("/rules", func(r chi.Router) { - r.Post("/", c.AddRules) - r.Put("/", c.RegRules) - r.Delete("/{rules_name}", c.DelRules) - r.Get("/{rules_name}", c.GetRules) - + r.Route("/v2/rules", func(r chi.Router) { + r.Post("/", c.AddRules) + r.Put("/{rules_name}", c.RegRules) + r.Delete("/{rules_name}", c.DelRules) + r.Get("/{rules_name}", c.GetRules) + r.Get("/all", c.GetAllRules) }) util.ProfilerSetup(r) return r diff --git a/monitor/prometheus/manager.go b/monitor/prometheus/manager.go index 929df5cda..5c39bf451 100644 --- a/monitor/prometheus/manager.go +++ b/monitor/prometheus/manager.go @@ -74,7 +74,7 @@ func NewManager(config *option.Config, a *AlertingRulesManager) *Manager { ScrapeInterval: model.Duration(time.Second * 5), EvaluationInterval: model.Duration(time.Second * 30), }, - RuleFiles: []string{"/etc/prometheus/rules.yml"}, + RuleFiles: []string{config.AlertingRulesFile}, }, Registry: reg, httpClient: client, @@ -82,7 +82,7 @@ func NewManager(config *option.Config, a *AlertingRulesManager) *Manager { a: a, } m.LoadConfig() - m.a.RulesConfig.InitRulesConfig() + m.a.InitRulesConfig() return m } diff --git a/monitor/prometheus/rules_manager.go b/monitor/prometheus/rules_manager.go index 99e192a4f..b2c2ecafd 100644 --- a/monitor/prometheus/rules_manager.go +++ b/monitor/prometheus/rules_manager.go @@ -5,75 +5,171 @@ import ( "io/ioutil" "gopkg.in/yaml.v2" "os" + "github.com/goodrain/rainbond/cmd/monitor/option" + ) type AlertingRulesConfig struct { - Groups []*AlertingNameConfig `yaml:"groups"` + Groups []*AlertingNameConfig `yaml:"groups" json:"groups"` } type AlertingNameConfig struct { - Name string `yaml:"name"` - Rules []*RulesConfig `yaml:"rules"` + Name string `yaml:"name" json:"name"` + Rules []*RulesConfig `yaml:"rules" json:"rules"` } type RulesConfig struct { - Alert string `yaml:"alert"` - Expr string `yaml:"expr"` - For string `yaml:"for"` - Labels map[string]string `yaml:"labels"` - Annotations map[string]string `yaml:"annotations"` + Alert string `yaml:"alert" json:"alert"` + Expr string `yaml:"expr" json:"expr"` + For string `yaml:"for" json:"for"` + Labels map[string]string `yaml:"labels" json:"labels"` + Annotations map[string]string `yaml:"annotations" json:"annotations"` } type AlertingRulesManager struct { RulesConfig *AlertingRulesConfig - + config *option.Config } -func NewRulesManager() *AlertingRulesManager { - a:= &AlertingRulesManager{ +func NewRulesManager(config *option.Config) *AlertingRulesManager { + a := &AlertingRulesManager{ RulesConfig: &AlertingRulesConfig{ - Groups:[]*AlertingNameConfig{ + Groups: []*AlertingNameConfig{ &AlertingNameConfig{ - Name: "test", + Name: "InstanceHealth", Rules: []*RulesConfig{ &RulesConfig{ - Alert: "MqHealth", - Expr: "acp_mq_exporter_health_status{job='mq'} < 1", - For: "2m", - Labels: map[string]string{"service_name": "mq"}, - Annotations: map[string]string{"summary": "unhealthy"}, + Alert: "InstanceDown", + Expr: "up == 0", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "builder {{$labels.instance}} down", "description":"{{$labels.instance}} of job {{$labels.job}} has been down for more than 3 minutes"}, }, }, }, &AlertingNameConfig{ - Name: "test2", + Name: "BuilderHealth", Rules: []*RulesConfig{ &RulesConfig{ - Alert: "builderHealth", - Expr: "acp_mq_exporter_health_status{job='mq'} < 1", - For: "5m", - Labels: map[string]string{"service_name": "builder"}, - Annotations: map[string]string{"summary": "unhealthy"}, + Alert: "BuilderUnhealthy", + Expr: "builder_exporter_health_status == 0", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "builder unhealthy"}, + }, + &RulesConfig{ + Alert: "BuilderTaskError", + Expr: "builder_exporter_builder_task_error > 30", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "Builder execution task error number is greater than 30"}, + }, + }, + }, + &AlertingNameConfig{ + + Name: "WorkerHealth", + Rules: []*RulesConfig{ + &RulesConfig{ + Alert: "WorkerUnhealthy", + Expr: "app_resource_exporter_health_status == 0", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "worker unhealthy"}, + }, + &RulesConfig{ + Alert: "WorkerTaskError", + Expr: "app_resource_exporter_worker_task_error > 50", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "worker execution task error number is greater than 50"}, + }, + }, + }, + &AlertingNameConfig{ + + Name: "EntranceHealth", + Rules: []*RulesConfig{ + &RulesConfig{ + Alert: "EntranceUnHealthy", + Expr: "acp_entrance_exporter_health_status == 0", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "entrance unhealthy"}, + }, + }, + }, + &AlertingNameConfig{ + + Name: "MqHealth", + Rules: []*RulesConfig{ + &RulesConfig{ + Alert: "MqUnhealthy", + Expr: "acp_mq_exporter_health_status == 0", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "mq unhealthy"}, + }, + &RulesConfig{ + Alert: "TeamTaskMany", + Expr: "acp_mq_dequeue_number-acp_mq_enqueue_number > 200", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "The number of tasks in the queue is greater than 200"}, + }, + }, + }, + &AlertingNameConfig{ + + Name: "EventlogHealth", + Rules: []*RulesConfig{ + &RulesConfig{ + Alert: "EventLogUnhealthy", + Expr: "event_log_exporter_health_status == 0", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "eventlog unhealthy"}, + }, + &RulesConfig{ + Alert: "EventLogDown", + Expr: "event_log_exporter_instanse_up == 0", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "eventlog service down"}, + }, + }, + }, + &AlertingNameConfig{ + + Name: "WebcliHealth", + Rules: []*RulesConfig{ + &RulesConfig{ + Alert: "WebcliUnhealthy", + Expr: "webcli_exporter_health_status == 0", + For: "3m", + Labels: map[string]string{}, + Annotations: map[string]string{"summary": "webcli unhealthy"}, }, }, }, }, }, + config: config, } return a } -func (a *AlertingRulesConfig)LoadAlertingRulesConfig() error { +func (a *AlertingRulesManager) LoadAlertingRulesConfig() error { logrus.Info("Load AlertingRules config file.") - content, err := ioutil.ReadFile("/etc/prometheus/rules.yml") + content, err := ioutil.ReadFile(a.config.AlertingRulesFile) if err != nil { logrus.Error("Failed to read AlertingRules config file: ", err) logrus.Info("Init config file by default values.") return nil } - if err := yaml.Unmarshal(content, a); err != nil { + if err := yaml.Unmarshal(content, a.RulesConfig); err != nil { logrus.Error("Unmarshal AlertingRulesConfig config string to object error.", err.Error()) return err } @@ -82,17 +178,16 @@ func (a *AlertingRulesConfig)LoadAlertingRulesConfig() error { return nil } - -func (a *AlertingRulesConfig)SaveAlertingRulesConfig() error { +func (a *AlertingRulesManager) SaveAlertingRulesConfig() error { logrus.Debug("Save alerting rules config file.") - data, err := yaml.Marshal(a) + data, err := yaml.Marshal(a.RulesConfig) if err != nil { logrus.Error("Marshal alerting rules config to yaml error.", err.Error()) return err } - err = ioutil.WriteFile("/etc/prometheus/rules.yml", data, 0644) + err = ioutil.WriteFile(a.config.AlertingRulesFile, data, 0644) if err != nil { logrus.Error("Write alerting rules config file error.", err.Error()) return err @@ -101,15 +196,14 @@ func (a *AlertingRulesConfig)SaveAlertingRulesConfig() error { return nil } - -func (a *AlertingRulesConfig) AddRules(val AlertingNameConfig) error { - group := a.Groups +func (a *AlertingRulesManager) AddRules(val AlertingNameConfig) error { + group := a.RulesConfig.Groups group = append(group, &val) return nil } -func (a *AlertingRulesConfig) InitRulesConfig() { - _, err := os.Stat("/etc/prometheus/rules.yml") //os.Stat获取文件信息 +func (a *AlertingRulesManager) InitRulesConfig() { + _, err := os.Stat(a.config.AlertingRulesFile) //os.Stat获取文件信息 if err != nil { if os.IsExist(err) { return @@ -119,4 +213,4 @@ func (a *AlertingRulesConfig) InitRulesConfig() { } return -} \ No newline at end of file +} diff --git a/node/api/model/model.go b/node/api/model/model.go index c4c221d35..b67b2fbe1 100644 --- a/node/api/model/model.go +++ b/node/api/model/model.go @@ -439,3 +439,20 @@ type NodeDetails struct { AllocatedResources map[string]string `json:"allocatedresources"` Events map[string][]string `json:"events"` } + +type AlertingRulesConfig struct { + Groups []*AlertingNameConfig `yaml:"groups" json:"groups"` +} + +type AlertingNameConfig struct { + Name string `yaml:"name" json:"name"` + Rules []*RulesConfig `yaml:"rules" json:"rules"` +} + +type RulesConfig struct { + Alert string `yaml:"alert" json:"alert"` + Expr string `yaml:"expr" json:"expr"` + For string `yaml:"for" json:"for"` + Labels map[string]string `yaml:"labels" json:"labels"` + Annotations map[string]string `yaml:"annotations" json:"annotations"` +}