Merge branch 'V3.7' of https://github.com/goodrain/rainbond into V3.7

This commit is contained in:
barnettZQG 2018-08-02 12:17:07 +08:00
commit e8b1c44007
26 changed files with 624 additions and 256 deletions

View File

@ -36,7 +36,7 @@ type DockerConsole struct {
}
var defaultDockerConsoleEndpoints = []string{"127.0.0.1:7171"}
var defaultEventLogEndpoints = []string{"127.0.0.1:6363"}
var defaultEventLogEndpoints = []string{"local=>127.0.0.1:6363"}
var defaultEtcdEndpoints = []string{"127.0.0.1:2379"}
var dockerConsole *DockerConsole

View File

@ -104,7 +104,7 @@ func (e *defalt) UpdateEndpoints(endpoints ...*corediscoverconfig.Endpoint) {
var endStr []string
for _, end := range endpoints {
if end.URL != "" {
endStr = append(endStr, end.URL)
endStr = append(endStr, end.Name + "=>" + end.URL)
}
}
logrus.Debugf("endstr is %v, name is %v", endStr, e.name)

View File

@ -27,6 +27,7 @@ import (
var nodeProxy proxy.Proxy
var builderProxy proxy.Proxy
var prometheusProxy proxy.Proxy
var monitorProxy proxy.Proxy
//InitProxy 初始化
func InitProxy(conf option.Config) {
@ -42,6 +43,10 @@ func InitProxy(conf option.Config) {
prometheusProxy = proxy.CreateProxy("prometheus", "http", []string{"127.0.0.1:9999"})
discover.GetEndpointDiscover(conf.EtcdEndpoint).AddProject("prometheus", prometheusProxy)
}
if monitorProxy == nil {
monitorProxy = proxy.CreateProxy("monitor", "http", []string{"127.0.0.1:3329"})
discover.GetEndpointDiscover(conf.EtcdEndpoint).AddProject("monitor", monitorProxy)
}
}
@ -59,3 +64,8 @@ func GetBuilderProxy() proxy.Proxy {
func GetPrometheusProxy() proxy.Proxy {
return prometheusProxy
}
//GetMonitorProxy GetMonitorProxy
func GetMonitorProxy() proxy.Proxy {
return monitorProxy
}

View File

@ -162,6 +162,10 @@ func Proxy(next http.Handler) http.Handler {
handler.GetNodeProxy().Proxy(w, r)
return
}
if strings.HasPrefix(r.RequestURI, "/v2/rules") {
handler.GetMonitorProxy().Proxy(w, r)
return
}
next.ServeHTTP(w, r)
}
return http.HandlerFunc(fn)

View File

@ -45,7 +45,15 @@ func (h *HTTPProxy) Proxy(w http.ResponseWriter, r *http.Request) {
//UpdateEndpoints 更新端点
func (h *HTTPProxy) UpdateEndpoints(endpoints ...string) {
h.endpoints = CreateEndpoints(endpoints)
ends := []string{}
for _, end := range endpoints {
if kv := strings.Split(end, "=>"); len(kv) > 1 {
ends = append(ends, kv[1])
} else {
ends = append(ends, end)
}
}
h.endpoints = CreateEndpoints(ends)
}
//Do do proxy
@ -60,5 +68,13 @@ func (h *HTTPProxy) Do(r *http.Request) (*http.Response, error) {
}
func createHTTPProxy(name string, endpoints []string) *HTTPProxy {
return &HTTPProxy{name, CreateEndpoints(endpoints), NewRoundRobin()}
ends := []string{}
for _, end := range endpoints {
if kv := strings.Split(end, "=>"); len(kv) > 1 {
ends = append(ends, kv[1])
} else {
ends = append(ends, end)
}
}
return &HTTPProxy{name, CreateEndpoints(ends), NewRoundRobin()}
}

View File

@ -19,12 +19,9 @@
package proxy
import (
"io/ioutil"
"net/http"
"strings"
"sync/atomic"
"github.com/Sirupsen/logrus"
)
// RoundRobin round robin loadBalance impl
@ -44,6 +41,20 @@ func (e Endpoint) String() string {
return string(e)
}
func (e Endpoint) GetName() string {
if kv := strings.Split(string(e), "=>"); len(kv) > 1 {
return kv[0]
}
return string(e)
}
func (e Endpoint) GetAddr() string {
if kv := strings.Split(string(e), "=>"); len(kv) > 1 {
return kv[1]
}
return string(e)
}
//EndpointList EndpointList
type EndpointList []Endpoint
@ -122,39 +133,34 @@ type SelectBalance struct {
//NewSelectBalance 创建选择性负载均衡
func NewSelectBalance() *SelectBalance {
body, err := ioutil.ReadFile("/etc/goodrain/host_id_list.conf")
if err != nil {
logrus.Error("read host id list error,", err.Error())
}
sb := &SelectBalance{
return &SelectBalance{
hostIDMap: map[string]string{"local": "127.0.0.1:6363"},
}
if body != nil && len(body) > 0 {
listStr := string(body)
hosts := strings.Split(strings.TrimSpace(listStr), ";")
for _, h := range hosts {
info := strings.Split(strings.Trim(h, "\r\n"), "=")
if len(info) == 2 {
sb.hostIDMap[info[0]] = info[1]
}
}
}
logrus.Info("Docker log websocket server endpoints:", sb.hostIDMap)
return sb
}
//Select 负载
func (s *SelectBalance) Select(r *http.Request, endpoints EndpointList) Endpoint {
if r.URL != nil {
hostID := r.URL.Query().Get("host_id")
if e, ok := s.hostIDMap[hostID]; ok {
if endpoints.HaveEndpoint(e) {
return Endpoint(e)
}
if r.URL == nil {
return Endpoint(s.hostIDMap["local"])
}
id2ip := map[string]string{"local": "127.0.0.1:6363"}
for _, end := range endpoints {
if kv := strings.Split(string(end), "=>"); len(kv) > 1 {
id2ip[kv[0]] = kv[1]
}
}
if r.URL != nil {
hostID := r.URL.Query().Get("host_id")
if e, ok := id2ip[hostID]; ok {
return Endpoint(e)
}
}
if len(endpoints) > 0 {
return endpoints[0]
}
return Endpoint(s.hostIDMap["local"])
}

View File

@ -24,7 +24,7 @@ import "net/http"
type Proxy interface {
Proxy(w http.ResponseWriter, r *http.Request)
Do(r *http.Request) (*http.Response, error)
UpdateEndpoints(endpoints ...string)
UpdateEndpoints(endpoints ...string) // format: ["name=>ip:port", ...]
}
//CreateProxy 创建代理

View File

@ -127,11 +127,12 @@ type WebSocketProxy struct {
func (h *WebSocketProxy) Proxy(w http.ResponseWriter, req *http.Request) {
endpoint := h.lb.Select(req, h.endpoints)
logrus.Info("Proxy webSocket to: ", endpoint)
path := req.RequestURI
if strings.Contains(path, "?") {
path = path[:strings.Index(path, "?")]
}
u := url.URL{Scheme: "ws", Host: endpoint.String(), Path: path}
u := url.URL{Scheme: "ws", Host: endpoint.GetAddr(), Path: path}
logrus.Infof("connecting to %s", u.String())
// Pass headers from the incoming request to the dialer to forward them to
// the final destinations.

162
api/region/monitor.go Normal file
View File

@ -0,0 +1,162 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package region
import (
"github.com/goodrain/rainbond/api/util"
"github.com/goodrain/rainbond/node/api/model"
utilhttp "github.com/goodrain/rainbond/util/http"
"fmt"
"encoding/json"
"bytes"
"os"
"errors"
"io/ioutil"
"github.com/Sirupsen/logrus"
"gopkg.in/yaml.v2"
)
//ClusterInterface cluster api
type MonitorInterface interface {
GetRule(name string) (*model.AlertingNameConfig, *util.APIHandleError)
GetAllRule() (*model.AlertingRulesConfig, *util.APIHandleError)
DelRule(name string) (*utilhttp.ResponseBody, *util.APIHandleError)
AddRule(path string) (*utilhttp.ResponseBody, *util.APIHandleError)
RegRule(ruleName string, path string) (*utilhttp.ResponseBody, *util.APIHandleError)
}
func (r *regionImpl) Monitor() MonitorInterface {
return &monitor{prefix: "/v2/rules", regionImpl: *r}
}
type monitor struct {
regionImpl
prefix string
}
func (m *monitor) GetRule(name string) (*model.AlertingNameConfig, *util.APIHandleError) {
var ac model.AlertingNameConfig
var decode utilhttp.ResponseBody
decode.Bean = &ac
code, err := m.DoRequest(m.prefix+"/"+name, "GET", nil, &decode)
if err != nil {
return nil, handleErrAndCode(err, code)
}
if code != 200 {
logrus.Error("Return failure message ", decode.Bean)
return nil, util.CreateAPIHandleError(code, fmt.Errorf("get alerting rules error code %d", code))
}
return &ac, nil
}
func (m *monitor) GetAllRule() (*model.AlertingRulesConfig, *util.APIHandleError) {
var ac model.AlertingRulesConfig
var decode utilhttp.ResponseBody
decode.Bean = &ac
code, err := m.DoRequest(m.prefix+"/all", "GET", nil, &decode)
if err != nil {
return nil, handleErrAndCode(err, code)
}
if code != 200 {
logrus.Error("Return failure message ", decode.Bean)
return nil, util.CreateAPIHandleError(code, fmt.Errorf("get alerting rules error code %d", code))
}
return &ac, nil
}
func (m *monitor) DelRule(name string) (*utilhttp.ResponseBody, *util.APIHandleError) {
var decode utilhttp.ResponseBody
code, err := m.DoRequest(m.prefix+"/"+name, "DELETE", nil, &decode)
if err != nil {
return nil, handleErrAndCode(err, code)
}
if code != 200 {
logrus.Error("Return failure message ", decode.Bean)
return nil, util.CreateAPIHandleError(code, fmt.Errorf("del alerting rules error code %d", code))
}
return &decode, nil
}
func (m *monitor) AddRule(path string) (*utilhttp.ResponseBody, *util.APIHandleError) {
_, err := os.Stat(path)
if err!= nil || !os.IsExist(err){
return nil, util.CreateAPIHandleError(400, errors.New("file does not exist"))
}
content, err := ioutil.ReadFile(path)
if err != nil {
logrus.Error("Failed to read AlertingRules config file: ", err.Error())
return nil, util.CreateAPIHandleError(400, err)
}
var rulesConfig model.AlertingNameConfig
if err := yaml.Unmarshal(content, &rulesConfig); err != nil {
logrus.Error("Unmarshal AlertingRulesConfig config string to object error.", err.Error())
return nil, util.CreateAPIHandleError(400, err)
}
var decode utilhttp.ResponseBody
body, err := json.Marshal(rulesConfig)
if err != nil {
return nil, util.CreateAPIHandleError(400, err)
}
code, err := m.DoRequest(m.prefix, "POST", bytes.NewBuffer(body), &decode)
if err != nil {
println("====err>",code,err)
return nil, handleErrAndCode(err, code)
}
if code != 200 {
logrus.Error("Return failure message ", decode.Bean)
return nil, util.CreateAPIHandleError(code, fmt.Errorf("add alerting rules error code %d", code))
}
return &decode, nil
}
func (m *monitor) RegRule(ruleName string, path string) (*utilhttp.ResponseBody, *util.APIHandleError) {
_, err := os.Stat(path)
if err!= nil || !os.IsExist(err){
return nil, util.CreateAPIHandleError(400, errors.New("file does not exist"))
}
content, err := ioutil.ReadFile(path)
if err != nil {
logrus.Error("Failed to read AlertingRules config file: ", err.Error())
return nil, util.CreateAPIHandleError(400, err)
}
var rulesConfig model.AlertingNameConfig
if err := yaml.Unmarshal(content, &rulesConfig); err != nil {
logrus.Error("Unmarshal AlertingRulesConfig config string to object error.", err.Error())
return nil, util.CreateAPIHandleError(400, err)
}
var decode utilhttp.ResponseBody
body, err := json.Marshal(rulesConfig)
if err != nil {
return nil, util.CreateAPIHandleError(400, err)
}
code, err := m.DoRequest(m.prefix+"/"+ruleName, "PUT", bytes.NewBuffer(body), &decode)
if err != nil {
println("====err>",code,err)
return nil, handleErrAndCode(err, code)
}
if code != 200 {
logrus.Error("Return failure message ", decode.Bean)
return nil, util.CreateAPIHandleError(code, fmt.Errorf("add alerting rules error code %d", code))
}
return &decode, nil
}

View File

@ -55,6 +55,7 @@ type Region interface {
Cluster() ClusterInterface
Configs() ConfigsInterface
Version() string
Monitor() MonitorInterface
DoRequest(path, method string, body io.Reader, decode *utilhttp.ResponseBody) (int, error)
}

View File

@ -285,9 +285,9 @@ func (i *SourceCodeBuildItem) buildImage() error {
tag := i.DeployVersion
buildImageName := strings.ToLower(fmt.Sprintf("%s/%s:%s", REGISTRYDOMAIN, name, tag))
args := make(map[string]string, 5)
for k, v := range args {
if strings.Contains(k, "BUILD_ARG_") {
args[k] = v
for k, v := range i.BuildEnvs {
if ks := strings.Split(k, "ARG_"); len(ks) > 1 {
args[ks[1]] = v
}
}
buildOptions := types.ImageBuildOptions{

View File

@ -405,13 +405,15 @@ func (d *SourceCodeParse) parseDockerfileInfo(dockerfile string) bool {
length := len(cm.Value)
for i := 0; i < length; i++ {
if kv := strings.Split(cm.Value[i], "="); len(kv) > 1 {
d.envs[kv[0]] = &Env{Name: kv[0], Value: kv[1]}
key := "BUILD_ARG_" + kv[0]
d.envs[key] = &Env{Name: key, Value: kv[1]}
} else {
if i + 1 >= length {
logrus.Error("Parse ARG format error at ", cm.Value[1])
continue
}
d.envs[cm.Value[i]] = &Env{Name: cm.Value[i], Value: cm.Value[i+1]}
key := "BUILD_ARG_" + cm.Value[i]
d.envs[key] = &Env{Name: key, Value: cm.Value[i+1]}
i++
}
}

View File

@ -38,6 +38,7 @@ import (
"github.com/Sirupsen/logrus"
"github.com/goodrain/rainbond/eventlog/db"
"github.com/spf13/pflag"
"github.com/goodrain/rainbond/util"
)
type LogServer struct {
@ -241,8 +242,15 @@ func (s *LogServer) Run() error {
}
defer udpkeepalive.Stop()
hostID, err := util.ReadHostID(s.Conf.Cluster.Discover.NodeIDFile)
if err != nil {
return err
}
id := hostID[len(hostID)-12:]
httpkeepalive, err := discover.CreateKeepAlive(s.Conf.Cluster.Discover.EtcdAddr, "event_log_event_http",
s.Conf.Cluster.Discover.InstanceIP, s.Conf.Cluster.Discover.InstanceIP, s.Conf.WebSocket.BindPort)
id, s.Conf.Cluster.Discover.InstanceIP, s.Conf.WebSocket.BindPort)
if err != nil {
return err
}

View File

@ -41,7 +41,7 @@ func main() {
c.CompleteConfig()
// start prometheus daemon and watching tis status in all time, exit monitor process if start failed
a := prometheus.NewRulesManager()
a := prometheus.NewRulesManager(c)
p := prometheus.NewManager(c, a)
controllerManager := controller.NewControllerManager(a,p)

View File

@ -38,6 +38,8 @@ type Config struct {
StartArgs []string
ConfigFile string
AlertingRulesFile string
AlertManagerUrl string
LocalStoragePath string
Web Web
Tsdb Tsdb
@ -96,6 +98,8 @@ func NewConfig() *Config {
LogLevel: "info",
ConfigFile: "/etc/prometheus/prometheus.yml",
AlertingRulesFile: "/etc/prometheus/rules.yml",
AlertManagerUrl: "",
LocalStoragePath: "/prometheusdata",
WebTimeout: "5m",
RemoteFlushDeadline: "1m",
@ -128,6 +132,10 @@ func (c *Config) AddFlag(cmd *pflag.FlagSet) {
func (c *Config) AddPrometheusFlag(cmd *pflag.FlagSet) {
cmd.StringVar(&c.ConfigFile, "config.file", c.ConfigFile, "Prometheus configuration file path.")
cmd.StringVar(&c.AlertManagerUrl, "alertmanager.url", c.AlertManagerUrl, "AlertManager url.")
cmd.StringVar(&c.AlertingRulesFile, "rules-config.file", c.AlertingRulesFile, "Prometheus alerting rules config file path.")
cmd.StringVar(&c.Web.ListenAddress, "web.listen-address", c.Web.ListenAddress, "Address to listen on for UI, API, and telemetry.")
cmd.StringVar(&c.WebTimeout, "web.read-timeout", c.WebTimeout, "Maximum duration before timing out read of the request, and closing idle connections.")
@ -205,6 +213,9 @@ func (c *Config) CompleteConfig() {
if c.Web.EnableLifecycle {
defaultOptions += " --web.enable-lifecycle"
}
if c.AlertManagerUrl != "" {
defaultOptions += " --alertmanager.url="+c.AlertManagerUrl
}
args := strings.Split(defaultOptions, " ")
c.StartArgs = append(c.StartArgs, os.Args[0])

View File

@ -25,7 +25,7 @@ import (
"syscall"
"github.com/goodrain/rainbond/cmd/webcli/option"
"github.com/goodrain/rainbond/discover"
discover "github.com/goodrain/rainbond/discover.v2"
"github.com/goodrain/rainbond/webcli/app"
"github.com/Sirupsen/logrus"

View File

@ -55,6 +55,7 @@ func getClusterInfo(c *cli.Context) error {
table.AddRow("DistributedDisk", fmt.Sprintf("%dGb/%dGb", clusterInfo.ReqDisk/1024/1024/1024, clusterInfo.CapDisk/1024/1024/1024),
fmt.Sprintf("%.2f", float32(clusterInfo.ReqDisk*100)/float32(clusterInfo.CapDisk))+"%")
fmt.Println(table)
//show services health status
list, err := clients.RegionClient.Nodes().List()
handleErr(err)
@ -111,7 +112,7 @@ func getServicesHealthy(nodes []*client.HostNode) (map[string][]map[string]strin
func summaryResult(list []map[string]string) (status string, errMessage string) {
upNum := 0
err := "N/A"
err := ""
for _, v := range list {
if v["type"] == "OutOfDisk" ||v["type"] == "DiskPressure"||v["type"] == "MemoryPressure"||v["type"] == "InstallNotReady"{
if v["status"] == "False" {
@ -130,9 +131,9 @@ func summaryResult(list []map[string]string) (status string, errMessage string)
}
}
if upNum == len(list){
status = "\033[0;37;42m" + strconv.Itoa(upNum) + "/" + strconv.Itoa(len(list)) + " \033[0m"
status = "\033[0;32;32m" + strconv.Itoa(upNum) + "/" + strconv.Itoa(len(list)) + " \033[0m"
}else {
status = "\033[0;37;41m " + strconv.Itoa(upNum) + "/" + strconv.Itoa(len(list)) + " \033[0m"
status = "\033[0;31;31m " + strconv.Itoa(upNum) + "/" + strconv.Itoa(len(list)) + " \033[0m"
}
errMessage = err

View File

@ -36,6 +36,7 @@ func GetCmds() []cli.Command {
cmds = append(cmds, NewCmdExec())
cmds = append(cmds, NewCmdInit())
cmds = append(cmds, NewCmdShow())
cmds = append(cmds, NewCmdAlerting())
//task相关命令
//cmds = append(cmds, NewCmdTasks())

View File

@ -19,23 +19,18 @@
package cmd
import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"net/http"
"os/exec"
"strings"
"time"
"github.com/Sirupsen/logrus"
"github.com/urfave/cli"
//"github.com/goodrain/rainbond/grctl/clients"
"github.com/goodrain/rainbond/api/util"
"os"
"github.com/goodrain/rainbond/builder/sources"
"github.com/goodrain/rainbond/grctl/clients"
"github.com/goodrain/rainbond/node/api/model"
coreutil "github.com/goodrain/rainbond/util"
)
//NewCmdInit grctl init
@ -49,7 +44,13 @@ func NewCmdInit() cli.Command {
},
cli.StringFlag{
Name: "type",
Usage: "node type:manage/compute, manage",
Usage: "node type: manage or compute",
Value: "manage",
},
cli.StringFlag{
Name: "work_dir",
Usage: "clone source code to the work directory",
Value: "/opt/rainbond/install",
},
cli.StringFlag{
Name: "mip",
@ -58,10 +59,12 @@ func NewCmdInit() cli.Command {
cli.StringFlag{
Name: "repo_ver",
Usage: "repo version,3.4",
Value: "master",
},
cli.StringFlag{
Name: "install_type",
Usage: "online/offline ,online",
Usage: "online or offline",
Value: "online",
},
cli.BoolFlag{
Name: "test",
@ -113,117 +116,44 @@ func NewCmdInstallStatus() cli.Command {
}
func initCluster(c *cli.Context) error {
url := "http://repo.goodrain.com/release/3.5/gaops/jobs/install/prepare/init.sh"
if c.Bool("test") {
url = "http://dev.repo.goodrain.com/gaops/jobs/install/prepare/init.sh"
// check if the rainbond is already installed
fmt.Println("Checking install enviremant.")
_, err := os.Stat("/tmp/rainbond.success")
if err == nil {
fmt.Println("Rainbond is already installed, if you whant reinstall, then please delete the file: /tmp/rainbond.success")
return nil
}
resp, err := http.Get(url)
if err != nil {
logrus.Errorf("error get init script,details %s", err.Error())
return err
// download source code from github if in online model
if c.String("install_type") == "online" {
fmt.Println("Download rainbond install package.")
csi := sources.CodeSourceInfo{
RepositoryURL: "https://github.com/goodrain/rainbond-install.git",
Branch: c.String("repo_ver"),
}
os.RemoveAll(c.String("work_dir"))
os.MkdirAll(c.String("work_dir"), 0755)
_, err := sources.GitClone(csi, c.String("work_dir"), nil, 5)
if err != nil {
println(err.Error())
return err
}
}
defer resp.Body.Close()
b, _ := ioutil.ReadAll(resp.Body)
args := []string{c.String("etcd"), c.String("type"), c.String("mip"), c.String("repo_ver"), c.String("install_type")}
arg := strings.Join(args, " ")
argCheck := strings.Join(args, "")
if len(argCheck) > 0 {
arg += ";"
} else {
arg = ""
}
fmt.Println("begin init cluster first node,please don't exit,wait install")
cmd := exec.Command("bash", "-c", arg+string(b))
var buffe bytes.Buffer
cmd.Stderr = &buffe
stdout, _ := cmd.StdoutPipe()
go func() {
read := bufio.NewReader(stdout)
for {
line, _, err := read.ReadLine()
if err != nil {
return
}
fmt.Println(string(line))
}
}()
if err := cmd.Run(); err != nil {
logrus.Errorf("current node init error,%s", err.Error())
return err
}
//检测并设置init的结果
result := buffe.String()
index := strings.Index(result, "{")
jsonOutPut := result
if index > -1 {
jsonOutPut = result[index:]
}
fmt.Println("Result:" + jsonOutPut)
output, err := model.ParseTaskOutPut(jsonOutPut)
// start setup script to install rainbond
fmt.Println("Begin init cluster first node,please don't exit,wait install")
cmd := exec.Command("bash", "-c", fmt.Sprintf("cd %s ; ./setup.sh %s %s", c.String("work_dir"), c.String("install_type"), c.String("repo_ver")))
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
err = cmd.Run()
if err != nil {
logrus.Errorf("get init current node result error:%s", err.Error())
return err
}
var newConfigs []model.ConfigUnit
if output.Global != nil {
for k, v := range output.Global {
if strings.Index(v, ",") > -1 {
values := strings.Split(v, ",")
coreutil.Deweight(&values)
newConfigs = append(newConfigs, model.ConfigUnit{
Name: strings.ToUpper(k),
Value: values,
ValueType: "array",
IsConfigurable: false,
})
} else {
newConfigs = append(newConfigs, model.ConfigUnit{
Name: strings.ToUpper(k),
Value: v,
ValueType: "string",
IsConfigurable: false,
})
}
}
}
var gc *model.GlobalConfig
var error *util.APIHandleError
for i := 0; i < 10; i++ {
time.Sleep(time.Second * 2)
gc, error = clients.RegionClient.Configs().Get()
if err == nil && gc != nil {
for _, nc := range newConfigs {
gc.Add(nc)
}
error = clients.RegionClient.Configs().Put(gc)
break
}
}
if error != nil {
logrus.Errorf("Update Datacenter configs error,please check node status")
return err
}
//获取当前节点ID
hostID, err := coreutil.ReadHostID("")
if err != nil {
logrus.Errorf("read nodeid error,please check node status")
println(err.Error())
return err
}
//error = clients.NodeClient.Tasks().Exec("check_manage_base_services", []string{hostID})
//if error != nil {
// logrus.Errorf("error exec task:%s,details %s", "check_manage_base_services", error.String())
// return error.Err
//}
error = clients.RegionClient.Tasks().Exec("check_manage_services", []string{hostID})
if error != nil {
logrus.Errorf("error exec task:%s,details %s", "check_manage_services", error.String())
return error.Err
}
//Status("check_manage_base_services", []string{hostID})
Status("check_manage_services", []string{hostID})
ioutil.WriteFile("/tmp/rainbond.success", []byte(c.String("repo_ver")), 0644)
fmt.Println("install manage node success,next you can :")
fmt.Println(" add compute node--grctl node add -h")
fmt.Println(" install compute node--grctl install compute -h")

111
grctl/cmd/monitor.go Normal file
View File

@ -0,0 +1,111 @@
package cmd
import (
"github.com/urfave/cli"
"github.com/Sirupsen/logrus"
"github.com/goodrain/rainbond/grctl/clients"
"fmt"
"github.com/ghodss/yaml"
"errors"
)
//NewCmdNode NewCmdNode
func NewCmdAlerting() cli.Command {
c := cli.Command{
Name: "alerting",
Usage: "监控报警。grctl alerting",
Subcommands: []cli.Command{
{
Name: "get",
Usage: "get rule_name",
Action: func(c *cli.Context) error {
Common(c)
name := c.Args().First()
if name == "" {
logrus.Errorf("need args")
return nil
}
v, err := clients.RegionClient.Monitor().GetRule(name)
handleErr(err)
rule, _ := yaml.Marshal(v)
fmt.Println(string(rule))
return nil
},
},
{
Name: "list",
Usage: "list",
Action: func(c *cli.Context) error {
Common(c)
list, err := clients.RegionClient.Monitor().GetAllRule()
handleErr(err)
ruleList, _ := yaml.Marshal(list)
fmt.Println(string(ruleList))
return nil
},
},
{
Name: "del",
Usage: "del rule_name",
Action: func(c *cli.Context) error {
Common(c)
name := c.Args().First()
if name == "" {
logrus.Errorf("need args")
return nil
}
_, err := clients.RegionClient.Monitor().DelRule(name)
handleErr(err)
fmt.Println("Delete rule succeeded")
return nil
},
},
{
Name: "add",
Usage: "add FilePath",
Action: func(c *cli.Context) error {
Common(c)
filePath := c.Args().First()
if filePath == "" {
logrus.Errorf("need args")
return nil
}
_, err := clients.RegionClient.Monitor().AddRule(filePath)
handleErr(err)
fmt.Println("Add rule successfully")
return nil
},
},
{
Name: "modify",
Usage: "modify 修改规则",
Flags: []cli.Flag{
cli.StringFlag{
Name: "RulesName,rn",
Value: "",
Usage: "RulesName",
},
cli.StringFlag{
Name: "RulesPath,rp",
Value: "",
Usage: "RulesPath",
},
},
Action: func(c *cli.Context) error {
Common(c)
if c.IsSet("RulesName") && c.IsSet("RulesPath") {
path := c.String("RulesPath")
ruleName := c.String("RulesName")
_, err := clients.RegionClient.Monitor().RegRule(ruleName, path)
handleErr(err)
fmt.Println("Modify rule successfully")
return nil
}
return errors.New("rule name or rules not null")
},
},
},
}
return c
}

View File

@ -102,9 +102,9 @@ func fileExist(path string) bool {
func handleStatus(serviceTable *termtables.Table, ready bool, v *client.HostNode) {
var formatReady string
if ready == false{
formatReady = "\033[0;37;41m false \033[0m"
formatReady = "\033[0;31;31m false \033[0m"
}else {
formatReady = "\033[0;37;42m true \033[0m"
formatReady = "\033[0;32;32m true \033[0m"
}
if v.Role.HasRule("compute") && !v.Role.HasRule("manage") {
serviceTable.AddRow(v.ID, v.InternalIP, v.HostName, v.Role.String(), v.Mode, v.Status, v.Alived, !v.Unschedulable, formatReady)
@ -125,12 +125,12 @@ func handleResult(serviceTable *termtables.Table, v *client.HostNode) {
var formatReady string
if v.Status == client.ConditionFalse{
if v.Type == client.OutOfDisk || v.Type == client.MemoryPressure || v.Type==client.DiskPressure ||v.Type==client.InstallNotReady{
formatReady = "\033[0;37;42m false \033[0m"
formatReady = "\033[0;32;32m false \033[0m"
}else {
formatReady = "\033[0;37;41m false \033[0m"
formatReady = "\033[0;31;31m false \033[0m"
}
}else {
formatReady = "\033[0;37;42m true \033[0m"
formatReady = "\033[0;32;32m true \033[0m"
}
serviceTable.AddRow(string(v.Type), formatReady, handleMessage(string(v.Status), v.Message))
}
@ -141,11 +141,11 @@ func extractReady(serviceTable *termtables.Table, v *client.HostNode, name strin
if string(v.Type) == name{
var formatReady string
if v.Status == client.ConditionFalse{
formatReady = "\033[0;37;41m false \033[0m"
formatReady = "\033[0;31;31m false \033[0m"
}else {
formatReady = "\033[0;37;42m true \033[0m"
formatReady = "\033[0;32;32m true \033[0m"
}
serviceTable.AddRow(string(v.Type), formatReady, handleMessage(string(v.Status), v.Message))
serviceTable.AddRow("\033[0;33;33m "+string(v.Type)+" \033[0m", formatReady, handleMessage(string(v.Status), v.Message))
}
}
}

View File

@ -6,9 +6,9 @@ import (
httputil "github.com/goodrain/rainbond/util/http"
"github.com/Sirupsen/logrus"
"gopkg.in/yaml.v2"
"github.com/goodrain/rainbond/monitor/prometheus"
"github.com/go-chi/chi"
"encoding/json"
)
type ControllerManager struct {
@ -25,42 +25,34 @@ func NewControllerManager(a *prometheus.AlertingRulesManager, p *prometheus.Mana
}
func (c *ControllerManager) AddRules(w http.ResponseWriter, r *http.Request) {
logrus.Info("add rules")
in, err := ioutil.ReadAll(r.Body)
if err != nil {
httputil.ReturnError(r, w, 400, err.Error())
return
}
println(string(in))
var RulesConfig prometheus.AlertingNameConfig
err = ioutil.WriteFile("/etc/prometheus/cache_rule.yml", in, 0644)
if err != nil {
logrus.Error(err.Error())
}
content, err := ioutil.ReadFile("/etc/prometheus/cache_rule.yml")
if err != nil {
logrus.Error( err)
}
if err := yaml.Unmarshal(content, &RulesConfig); err != nil {
logrus.Error("Unmarshal prometheus alerting rules config string to object error.", err.Error())
unmarshalErr := json.Unmarshal(in, &RulesConfig)
if unmarshalErr != nil {
logrus.Info(unmarshalErr)
httputil.ReturnError(r, w, 400, err.Error())
return
}
c.Rules.RulesConfig.LoadAlertingRulesConfig()
c.Rules.LoadAlertingRulesConfig()
group := c.Rules.RulesConfig.Groups
for _,v := range group{
if v.Name == RulesConfig.Name{
for _, v := range group {
if v.Name == RulesConfig.Name {
httputil.ReturnError(r, w, 400, "Rule already exists")
return
}
}
group = append(group, &RulesConfig)
c.Rules.RulesConfig.SaveAlertingRulesConfig()
c.Rules.RulesConfig.Groups = group
c.Rules.SaveAlertingRulesConfig()
c.Manager.RestartDaemon()
httputil.ReturnSuccess(r, w, "Add rule successfully")
@ -69,28 +61,29 @@ func (c *ControllerManager) AddRules(w http.ResponseWriter, r *http.Request) {
func (c *ControllerManager) GetRules(w http.ResponseWriter, r *http.Request) {
logrus.Infof("get rule")
rulesName := chi.URLParam(r, "rules_name")
c.Rules.RulesConfig.LoadAlertingRulesConfig()
c.Rules.LoadAlertingRulesConfig()
for _, v := range c.Rules.RulesConfig.Groups {
if v.Name == rulesName {
res := v.Rules
httputil.ReturnSuccess(r, w, res)
httputil.ReturnSuccess(r, w, v)
return
}
}
httputil.ReturnError(r, w, 400, "Rule does not exist")
httputil.ReturnError(r, w, 404, "Rule does not exist")
}
func (c *ControllerManager) DelRules(w http.ResponseWriter, r *http.Request) {
logrus.Infof("delete rule")
rulesName := chi.URLParam(r, "rules_name")
c.Rules.RulesConfig.LoadAlertingRulesConfig()
c.Rules.LoadAlertingRulesConfig()
groupsList := c.Rules.RulesConfig.Groups
for i, v := range groupsList {
if v.Name == rulesName {
groupsList = append(groupsList[:i],groupsList[i+1:]...)
c.Rules.RulesConfig.SaveAlertingRulesConfig()
groupsList = append(groupsList[:i], groupsList[i+1:]...)
c.Rules.RulesConfig.Groups = groupsList
c.Rules.SaveAlertingRulesConfig()
c.Manager.RestartDaemon()
httputil.ReturnSuccess(r, w, "successfully deleted")
return
}
@ -98,42 +91,42 @@ func (c *ControllerManager) DelRules(w http.ResponseWriter, r *http.Request) {
httputil.ReturnSuccess(r, w, "")
}
func (c *ControllerManager) RegRules(w http.ResponseWriter, r *http.Request) {
rulesName := chi.URLParam(r, "rules_name")
in, err := ioutil.ReadAll(r.Body)
if err != nil {
httputil.ReturnError(r, w, 400, err.Error())
return
}
println(string(in))
var RulesConfig prometheus.AlertingNameConfig
err = ioutil.WriteFile("/etc/prometheus/cache_rule.yml", in, 0644)
if err != nil {
logrus.Error(err.Error())
}
content, err := ioutil.ReadFile("/etc/prometheus/cache_rule.yml")
if err != nil {
logrus.Error( err)
}
if err := yaml.Unmarshal(content, &RulesConfig); err != nil {
logrus.Error("Unmarshal prometheus alerting rules config string to object error.", err.Error())
unmarshalErr := json.Unmarshal(in, &RulesConfig)
if unmarshalErr != nil {
logrus.Info(unmarshalErr)
httputil.ReturnError(r, w, 400, err.Error())
return
}
c.Rules.RulesConfig.LoadAlertingRulesConfig()
c.Rules.LoadAlertingRulesConfig()
group := c.Rules.RulesConfig.Groups
for i,v := range group{
if v.Name == RulesConfig.Name{
for i, v := range group {
if v.Name == rulesName {
group[i] = &RulesConfig
c.Manager.RestartDaemon()
httputil.ReturnSuccess(r, w, "Update rule succeeded")
c.Rules.RulesConfig.SaveAlertingRulesConfig()
c.Rules.SaveAlertingRulesConfig()
return
}
}
httputil.ReturnError(r, w, 400,"The rule to be updated does not exist")
httputil.ReturnError(r, w, 404, "The rule to be updated does not exist")
}
func (c *ControllerManager) GetAllRules(w http.ResponseWriter, r *http.Request) {
logrus.Infof("get all rule")
c.Rules.LoadAlertingRulesConfig()
val := c.Rules.RulesConfig
httputil.ReturnSuccess(r, w, val)
}

View File

@ -32,16 +32,16 @@ func APIServer(c *controller.ControllerManager) *chi.Mux {
r := chi.NewRouter()
r.Route("/monitor", func(r chi.Router) {
r.Get("/health", func(w http.ResponseWriter, r *http.Request) {
bean := map[string]string{"status":"health","info":"monitor service health"}
bean := map[string]string{"status": "health", "info": "monitor service health"}
httputil.ReturnSuccess(r, w, bean)
})
})
r.Route("/rules", func(r chi.Router) {
r.Post("/", c.AddRules)
r.Put("/", c.RegRules)
r.Delete("/{rules_name}", c.DelRules)
r.Get("/{rules_name}", c.GetRules)
r.Route("/v2/rules", func(r chi.Router) {
r.Post("/", c.AddRules)
r.Put("/{rules_name}", c.RegRules)
r.Delete("/{rules_name}", c.DelRules)
r.Get("/{rules_name}", c.GetRules)
r.Get("/all", c.GetAllRules)
})
util.ProfilerSetup(r)
return r

View File

@ -74,7 +74,7 @@ func NewManager(config *option.Config, a *AlertingRulesManager) *Manager {
ScrapeInterval: model.Duration(time.Second * 5),
EvaluationInterval: model.Duration(time.Second * 30),
},
RuleFiles: []string{"/etc/prometheus/rules.yml"},
RuleFiles: []string{config.AlertingRulesFile},
},
Registry: reg,
httpClient: client,
@ -82,7 +82,7 @@ func NewManager(config *option.Config, a *AlertingRulesManager) *Manager {
a: a,
}
m.LoadConfig()
m.a.RulesConfig.InitRulesConfig()
m.a.InitRulesConfig()
return m
}

View File

@ -5,75 +5,171 @@ import (
"io/ioutil"
"gopkg.in/yaml.v2"
"os"
"github.com/goodrain/rainbond/cmd/monitor/option"
)
type AlertingRulesConfig struct {
Groups []*AlertingNameConfig `yaml:"groups"`
Groups []*AlertingNameConfig `yaml:"groups" json:"groups"`
}
type AlertingNameConfig struct {
Name string `yaml:"name"`
Rules []*RulesConfig `yaml:"rules"`
Name string `yaml:"name" json:"name"`
Rules []*RulesConfig `yaml:"rules" json:"rules"`
}
type RulesConfig struct {
Alert string `yaml:"alert"`
Expr string `yaml:"expr"`
For string `yaml:"for"`
Labels map[string]string `yaml:"labels"`
Annotations map[string]string `yaml:"annotations"`
Alert string `yaml:"alert" json:"alert"`
Expr string `yaml:"expr" json:"expr"`
For string `yaml:"for" json:"for"`
Labels map[string]string `yaml:"labels" json:"labels"`
Annotations map[string]string `yaml:"annotations" json:"annotations"`
}
type AlertingRulesManager struct {
RulesConfig *AlertingRulesConfig
config *option.Config
}
func NewRulesManager() *AlertingRulesManager {
a:= &AlertingRulesManager{
func NewRulesManager(config *option.Config) *AlertingRulesManager {
a := &AlertingRulesManager{
RulesConfig: &AlertingRulesConfig{
Groups:[]*AlertingNameConfig{
Groups: []*AlertingNameConfig{
&AlertingNameConfig{
Name: "test",
Name: "InstanceHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "MqHealth",
Expr: "acp_mq_exporter_health_status{job='mq'} < 1",
For: "2m",
Labels: map[string]string{"service_name": "mq"},
Annotations: map[string]string{"summary": "unhealthy"},
Alert: "InstanceDown",
Expr: "up == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "builder {{$labels.instance}} down", "description":"{{$labels.instance}} of job {{$labels.job}} has been down for more than 3 minutes"},
},
},
},
&AlertingNameConfig{
Name: "test2",
Name: "BuilderHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "builderHealth",
Expr: "acp_mq_exporter_health_status{job='mq'} < 1",
For: "5m",
Labels: map[string]string{"service_name": "builder"},
Annotations: map[string]string{"summary": "unhealthy"},
Alert: "BuilderUnhealthy",
Expr: "builder_exporter_health_status == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "builder unhealthy"},
},
&RulesConfig{
Alert: "BuilderTaskError",
Expr: "builder_exporter_builder_task_error > 30",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "Builder execution task error number is greater than 30"},
},
},
},
&AlertingNameConfig{
Name: "WorkerHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "WorkerUnhealthy",
Expr: "app_resource_exporter_health_status == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "worker unhealthy"},
},
&RulesConfig{
Alert: "WorkerTaskError",
Expr: "app_resource_exporter_worker_task_error > 50",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "worker execution task error number is greater than 50"},
},
},
},
&AlertingNameConfig{
Name: "EntranceHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "EntranceUnHealthy",
Expr: "acp_entrance_exporter_health_status == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "entrance unhealthy"},
},
},
},
&AlertingNameConfig{
Name: "MqHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "MqUnhealthy",
Expr: "acp_mq_exporter_health_status == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "mq unhealthy"},
},
&RulesConfig{
Alert: "TeamTaskMany",
Expr: "acp_mq_dequeue_number-acp_mq_enqueue_number > 200",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "The number of tasks in the queue is greater than 200"},
},
},
},
&AlertingNameConfig{
Name: "EventlogHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "EventLogUnhealthy",
Expr: "event_log_exporter_health_status == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "eventlog unhealthy"},
},
&RulesConfig{
Alert: "EventLogDown",
Expr: "event_log_exporter_instanse_up == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "eventlog service down"},
},
},
},
&AlertingNameConfig{
Name: "WebcliHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "WebcliUnhealthy",
Expr: "webcli_exporter_health_status == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "webcli unhealthy"},
},
},
},
},
},
config: config,
}
return a
}
func (a *AlertingRulesConfig)LoadAlertingRulesConfig() error {
func (a *AlertingRulesManager) LoadAlertingRulesConfig() error {
logrus.Info("Load AlertingRules config file.")
content, err := ioutil.ReadFile("/etc/prometheus/rules.yml")
content, err := ioutil.ReadFile(a.config.AlertingRulesFile)
if err != nil {
logrus.Error("Failed to read AlertingRules config file: ", err)
logrus.Info("Init config file by default values.")
return nil
}
if err := yaml.Unmarshal(content, a); err != nil {
if err := yaml.Unmarshal(content, a.RulesConfig); err != nil {
logrus.Error("Unmarshal AlertingRulesConfig config string to object error.", err.Error())
return err
}
@ -82,17 +178,16 @@ func (a *AlertingRulesConfig)LoadAlertingRulesConfig() error {
return nil
}
func (a *AlertingRulesConfig)SaveAlertingRulesConfig() error {
func (a *AlertingRulesManager) SaveAlertingRulesConfig() error {
logrus.Debug("Save alerting rules config file.")
data, err := yaml.Marshal(a)
data, err := yaml.Marshal(a.RulesConfig)
if err != nil {
logrus.Error("Marshal alerting rules config to yaml error.", err.Error())
return err
}
err = ioutil.WriteFile("/etc/prometheus/rules.yml", data, 0644)
err = ioutil.WriteFile(a.config.AlertingRulesFile, data, 0644)
if err != nil {
logrus.Error("Write alerting rules config file error.", err.Error())
return err
@ -101,15 +196,14 @@ func (a *AlertingRulesConfig)SaveAlertingRulesConfig() error {
return nil
}
func (a *AlertingRulesConfig) AddRules(val AlertingNameConfig) error {
group := a.Groups
func (a *AlertingRulesManager) AddRules(val AlertingNameConfig) error {
group := a.RulesConfig.Groups
group = append(group, &val)
return nil
}
func (a *AlertingRulesConfig) InitRulesConfig() {
_, err := os.Stat("/etc/prometheus/rules.yml") //os.Stat获取文件信息
func (a *AlertingRulesManager) InitRulesConfig() {
_, err := os.Stat(a.config.AlertingRulesFile) //os.Stat获取文件信息
if err != nil {
if os.IsExist(err) {
return
@ -119,4 +213,4 @@ func (a *AlertingRulesConfig) InitRulesConfig() {
}
return
}
}

View File

@ -439,3 +439,20 @@ type NodeDetails struct {
AllocatedResources map[string]string `json:"allocatedresources"`
Events map[string][]string `json:"events"`
}
type AlertingRulesConfig struct {
Groups []*AlertingNameConfig `yaml:"groups" json:"groups"`
}
type AlertingNameConfig struct {
Name string `yaml:"name" json:"name"`
Rules []*RulesConfig `yaml:"rules" json:"rules"`
}
type RulesConfig struct {
Alert string `yaml:"alert" json:"alert"`
Expr string `yaml:"expr" json:"expr"`
For string `yaml:"for" json:"for"`
Labels map[string]string `yaml:"labels" json:"labels"`
Annotations map[string]string `yaml:"annotations" json:"annotations"`
}