[REV] change node code architecture

This commit is contained in:
barnettZQG 2018-07-13 10:11:10 +08:00
parent efc078cc38
commit 6dc588012d
18 changed files with 1368 additions and 7 deletions

View File

@ -102,7 +102,7 @@ type Conf struct {
DBType string
DBConnectionInfo string
TTL int64 // 节点超时时间,单位秒
TTL int64 // node heartbeat to master TTL
ReqTimeout int // 请求超时时间,单位秒
// 执行任务信息过期时间,单位秒
// 0 为不过期

View File

@ -55,6 +55,12 @@ const (
KindInterval // 一个任务执行间隔内允许执行一次
)
//Event JobEvent
type Event struct {
EventType string `json:"event_type"`
Job Job `json:"job"`
}
//Job 需要执行的任务
type Job struct {
ID string `json:"id"`

View File

@ -0,0 +1,19 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package kubecache

View File

@ -43,7 +43,7 @@ import (
"github.com/goodrain/rainbond/node/core/job"
"github.com/goodrain/rainbond/node/core/store"
"github.com/goodrain/rainbond/node/masterserver/node"
"github.com/goodrain/rainbond/node/nodeserver"
"github.com/goodrain/rainbond/node/nodem/taskrun"
"github.com/pquerna/ffjson/ffjson"
)
@ -55,7 +55,7 @@ type TaskEngine struct {
cancel context.CancelFunc
config *option.Conf
tasks map[string]*model.Task
jobs nodeserver.Jobs
jobs taskrun.Jobs
tasksLock, jobsLock sync.Mutex
dataCenterConfig *config.DataCenterConfig
nodeCluster *node.Cluster
@ -73,7 +73,7 @@ func CreateTaskEngine(nodeCluster *node.Cluster, node *model.HostNode) *TaskEngi
ctx: ctx,
cancel: cancel,
tasks: make(map[string]*model.Task),
jobs: make(nodeserver.Jobs),
jobs: make(taskrun.Jobs),
config: option.Config,
dataCenterConfig: config.GetDataCenterConfig(),
nodeCluster: nodeCluster,

View File

@ -0,0 +1,68 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package client
import (
"github.com/coreos/etcd/clientv3"
"github.com/goodrain/rainbond/node/core/config"
"github.com/goodrain/rainbond/node/core/job"
)
//ClusterClient ClusterClient
type ClusterClient interface {
UpdateStatus(*HostNode) error
GetMasters() ([]*HostNode, error)
GetNode(nodeID string) (*HostNode, error)
GetDataCenterConfig() (*config.DataCenterConfig, error)
WatchJobs() <-chan *job.Event
//WatchTasks()
//UpdateTask()
}
//NewClusterClient new cluster client
func NewClusterClient(etcdClient *clientv3.Client) ClusterClient {
return &etcdClusterClient{
etcdClient: etcdClient,
}
}
type etcdClusterClient struct {
etcdClient *clientv3.Client
}
func (e *etcdClusterClient) UpdateStatus(*HostNode) error {
return nil
}
func (e *etcdClusterClient) GetMasters() ([]*HostNode, error) {
return nil, nil
}
func (e *etcdClusterClient) GetDataCenterConfig() (*config.DataCenterConfig, error) {
return nil, nil
}
func (e *etcdClusterClient) WatchJobs() <-chan *job.Event {
return nil
}
func (e *etcdClusterClient) GetNode(nodeID string) (*HostNode, error) {
return nil, nil
}

331
node/nodem/client/node.go Normal file
View File

@ -0,0 +1,331 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package client
import (
"strings"
"time"
"k8s.io/client-go/pkg/api/v1"
"github.com/Sirupsen/logrus"
client "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/mvcc/mvccpb"
conf "github.com/goodrain/rainbond/cmd/node/option"
store "github.com/goodrain/rainbond/node/core/store"
"github.com/pquerna/ffjson/ffjson"
)
//APIHostNode api host node
type APIHostNode struct {
ID string `json:"uuid" validate:"uuid"`
HostName string `json:"host_name" validate:"host_name"`
InternalIP string `json:"internal_ip" validate:"internal_ip|ip"`
ExternalIP string `json:"external_ip" validate:"external_ip|ip"`
RootPass string `json:"root_pass,omitempty"`
Role []string `json:"role" validate:"role|required"`
Labels map[string]string `json:"labels"`
}
//Clone Clone
func (a APIHostNode) Clone() *HostNode {
hn := &HostNode{
ID: a.ID,
HostName: a.HostName,
InternalIP: a.InternalIP,
ExternalIP: a.ExternalIP,
RootPass: a.RootPass,
Role: a.Role,
Labels: a.Labels,
}
return hn
}
//HostNode rainbond node entity
type HostNode struct {
ID string `json:"uuid"`
HostName string `json:"host_name"`
CreateTime time.Time `json:"create_time"`
InternalIP string `json:"internal_ip"`
ExternalIP string `json:"external_ip"`
RootPass string `json:"root_pass,omitempty"`
KeyPath string `json:"key_path,omitempty"` //管理节点key文件路径
AvailableMemory int64 `json:"available_memory"`
AvailableCPU int64 `json:"available_cpu"`
Mode string `json:"mode"`
Role HostRule `json:"role"` //节点属性 compute manage storage
Status string `json:"status"` //节点状态 create,init,running,stop,delete
Labels map[string]string `json:"labels"` //节点标签 内置标签+用户自定义标签
Unschedulable bool `json:"unschedulable"` //不可调度
NodeStatus *NodeStatus `json:"node_status,omitempty"`
ClusterNode
}
//NodeStatus node status
type NodeStatus struct {
Status string `json:"status"`
Conditions []NodeCondition `json:"conditions,omitempty"`
NodeInfo NodeSystemInfo `json:"nodeInfo,omitempty" protobuf:"bytes,7,opt,name=nodeInfo"`
}
// NodeSystemInfo is a set of ids/uuids to uniquely identify the node.
type NodeSystemInfo struct {
// MachineID reported by the node. For unique machine identification
// in the cluster this field is preferred. Learn more from man(5)
// machine-id: http://man7.org/linux/man-pages/man5/machine-id.5.html
MachineID string `json:"machineID"`
// SystemUUID reported by the node. For unique machine identification
// MachineID is preferred. This field is specific to Red Hat hosts
// https://access.redhat.com/documentation/en-US/Red_Hat_Subscription_Management/1/html/RHSM/getting-system-uuid.html
SystemUUID string `json:"systemUUID"`
// Boot ID reported by the node.
BootID string `json:"bootID" protobuf:"bytes,3,opt,name=bootID"`
// Kernel Version reported by the node from 'uname -r' (e.g. 3.16.0-0.bpo.4-amd64).
KernelVersion string `json:"kernelVersion" `
// OS Image reported by the node from /etc/os-release (e.g. Debian GNU/Linux 7 (wheezy)).
OSImage string `json:"osImage"`
// ContainerRuntime Version reported by the node through runtime remote API (e.g. docker://1.5.0).
ContainerRuntimeVersion string `json:"containerRuntimeVersion"`
// The Operating System reported by the node
OperatingSystem string `json:"operatingSystem"`
// The Architecture reported by the node
Architecture string `json:"architecture"`
MemorySize uint32 `json:"memorySize"`
}
//Decode decode node info
func (n *HostNode) Decode(data []byte) error {
if err := ffjson.Unmarshal(data, n); err != nil {
logrus.Error("decode node info error:", err.Error())
return err
}
return nil
}
type NodeList []*HostNode
func (list NodeList) Len() int {
return len(list)
}
func (list NodeList) Less(i, j int) bool {
if list[i].InternalIP < list[j].InternalIP {
return true
} else {
return false
}
}
func (list NodeList) Swap(i, j int) {
var temp = list[i]
list[i] = list[j]
list[j] = temp
}
//GetNodeFromKV 从etcd解析node信息
func GetNodeFromKV(kv *mvccpb.KeyValue) *HostNode {
var node HostNode
if err := ffjson.Unmarshal(kv.Value, &node); err != nil {
logrus.Error("parse node info error:", err.Error())
return nil
}
return &node
}
//UpdataK8sCondition 更新k8s节点的状态到rainbond节点
func (n *HostNode) UpdataK8sCondition(conditions []v1.NodeCondition) {
for _, con := range conditions {
rbcon := NodeCondition{
Type: NodeConditionType(con.Type),
Status: ConditionStatus(con.Status),
LastHeartbeatTime: con.LastHeartbeatTime.Time,
LastTransitionTime: con.LastTransitionTime.Time,
Reason: con.Reason,
Message: con.Message,
}
n.UpdataCondition(rbcon)
}
}
//DeleteCondition DeleteCondition
func (n *HostNode) DeleteCondition(types ...NodeConditionType) {
if n.NodeStatus == nil {
return
}
for _, t := range types {
for i, c := range n.NodeStatus.Conditions {
if c.Type.Compare(t) {
n.NodeStatus.Conditions = append(n.NodeStatus.Conditions[:i], n.NodeStatus.Conditions[i+1:]...)
break
}
}
}
}
//UpdataCondition 更新状态
func (n *HostNode) UpdataCondition(conditions ...NodeCondition) {
if n.NodeStatus == nil {
n.NodeStatus = &NodeStatus{}
}
for _, newcon := range conditions {
var update bool
if n.NodeStatus.Conditions != nil {
for i, con := range n.NodeStatus.Conditions {
if con.Type.Compare(newcon.Type) {
n.NodeStatus.Conditions[i] = newcon
update = true
break
}
}
}
if !update {
n.NodeStatus.Conditions = append(n.NodeStatus.Conditions, newcon)
}
}
}
//HostRule 节点角色
type HostRule []string
//ComputeNode 计算节点
var ComputeNode = "compute"
//ManageNode 管理节点
var ManageNode = "manage"
//StorageNode 存储节点
var StorageNode = "storage"
//LBNode 边缘负载均衡节点
var LBNode = "lb"
//HasRule 是否具有什么角色
func (h HostRule) HasRule(rule string) bool {
for _, v := range h {
if v == rule {
return true
}
}
return false
}
func (h HostRule) String() string {
return strings.Join(h, ",")
}
//NodeConditionType NodeConditionType
type NodeConditionType string
// These are valid conditions of node.
const (
// NodeReady means this node is working
NodeReady NodeConditionType = "Ready"
// InstallNotReady means the installation task was not completed in this node.
InstallNotReady NodeConditionType = "InstallNotReady"
// NodeInit means node already install rainbond node and regist
NodeInit NodeConditionType = "NodeInit"
OutOfDisk NodeConditionType = "OutOfDisk"
MemoryPressure NodeConditionType = "MemoryPressure"
DiskPressure NodeConditionType = "DiskPressure"
)
//Compare 比较
func (nt NodeConditionType) Compare(ent NodeConditionType) bool {
return string(nt) == string(ent)
}
//ConditionStatus ConditionStatus
type ConditionStatus string
// These are valid condition statuses. "ConditionTrue" means a resource is in the condition.
// "ConditionFalse" means a resource is not in the condition. "ConditionUnknown" means kubernetes
// can't decide if a resource is in the condition or not. In the future, we could add other
// intermediate conditions, e.g. ConditionDegraded.
const (
ConditionTrue ConditionStatus = "True"
ConditionFalse ConditionStatus = "False"
ConditionUnknown ConditionStatus = "Unknown"
)
// NodeCondition contains condition information for a node.
type NodeCondition struct {
// Type of node condition.
Type NodeConditionType `json:"type" `
// Status of the condition, one of True, False, Unknown.
Status ConditionStatus `json:"status" `
// Last time we got an update on a given condition.
// +optional
LastHeartbeatTime time.Time `json:"lastHeartbeatTime,omitempty" `
// Last time the condition transit from one status to another.
// +optional
LastTransitionTime time.Time `json:"lastTransitionTime,omitempty" `
// (brief) reason for the condition's last transition.
// +optional
Reason string `json:"reason,omitempty"`
// Human readable message indicating details about last transition.
// +optional
Message string `json:"message,omitempty"`
}
// ClusterNode 集群节点实体
type ClusterNode struct {
PID string `json:"pid"` // 进程 pid
Version string `json:"version"`
UpTime time.Time `json:"up"` // 启动时间
DownTime time.Time `json:"down"` // 上次关闭时间
Alived bool `json:"alived"` // 是否可用
Connected bool `json:"connected"` // 当 Alived 为 true 时有效,表示心跳是否正常
}
//String string
func (h *HostNode) String() string {
res, _ := ffjson.Marshal(h)
return string(res)
}
//Put 节点上线更新
func (h *HostNode) Put(opts ...client.OpOption) (*client.PutResponse, error) {
return store.DefalutClient.Put(conf.Config.OnlineNodePath+"/"+h.ID, h.PID, opts...)
}
//Update 更新节点信息,由节点启动时调用
func (h *HostNode) Update() (*client.PutResponse, error) {
return store.DefalutClient.Put(conf.Config.NodePath+"/"+h.ID, h.String())
}
//DeleteNode 删除节点
func (h *HostNode) DeleteNode() (*client.DeleteResponse, error) {
return store.DefalutClient.Delete(conf.Config.NodePath + "/" + h.ID)
}
//Del 删除
func (h *HostNode) Del() (*client.DeleteResponse, error) {
return store.DefalutClient.Delete(conf.Config.OnlineNodePath + h.ID)
}
//GetNodes 获取节点
func GetNodes() (nodes []*HostNode, err error) {
return nil, nil
}
// Down 节点下线
func (h *HostNode) Down() {
h.Alived, h.DownTime = false, time.Now()
h.Update()
}

View File

@ -0,0 +1,25 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package controller
//Manager Manager
type Manager interface {
Start(errchan chan error)
Stop() error
}

View File

@ -0,0 +1,27 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package controller
import "github.com/goodrain/rainbond/node/nodem/healthy"
//Service Service
type Service struct {
Name string
ServiceHealth *healthy.ServiceHealth
}

View File

@ -0,0 +1,31 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package healthy
//Manager Manager
type Manager interface {
GetServiceHeadthy(serviceName string) *ServiceHealth
WatchServiceHeadthy() <-chan *ServiceHealth
Start(errchan chan error)
Stop() error
}
//ServiceHealth ServiceHealth
type ServiceHealth struct {
}

View File

@ -0,0 +1,78 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// +build linux
package info
import (
"bufio"
"exec"
"io/ioutil"
"os"
"runtime"
"strings"
"syscall"
"github.com/goodrain/rainbond/node/nodem/client"
)
//GetSystemInfo GetSystemInfo
func GetSystemInfo() (info client.NodeSystemInfo) {
info.Architecture = runtime.GOARCH
b, _ := ioutil.ReadFile("/etc/machine-id")
info.MachineID = string(b)
output, _ := exec.Command("uname", "-r").Output()
info.KernelVersion = string(output)
osInfo := readOS()
if name, ok := osInfo["NAME"]; ok {
info.OSImage = name
}
info.OperatingSystem = runtime.GOOS
info.MemorySize, _ = getMemory()
return info
}
func readOS() map[string]string {
f, err := os.Open("/etc/os-release")
if err != nil {
return nil
}
defer f.Close()
var info = make(map[string]string)
r := bufio.NewReader(f)
for {
line, _, err := r.ReadLine()
if err != nil {
return info
}
lines := strings.Split(line, "=")
if len(lines) >= 2 {
info[lines[0]] = lines[1]
}
}
}
func getMemory() (total uint32, free uint32) {
sysInfo := new(syscall.Sysinfo_t)
err := syscall.Sysinfo(sysInfo)
if err == nil {
return sysInfo.Totalram * uint32(syscall.Getpagesize()), sysInfo.Freeram * uint32(syscall.Getpagesize())
}
return 0, 0
}

View File

@ -0,0 +1,40 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// +build !linux
package info
import (
"runtime"
"github.com/goodrain/rainbond/node/nodem/client"
)
//GetSystemInfo GetSystemInfo
func GetSystemInfo() (info client.NodeSystemInfo) {
info.Architecture = runtime.GOARCH
info.MachineID = "do not read machineID"
info.OSImage = runtime.GOOS
info.OperatingSystem = runtime.GOOS
return info
}
func getMemory() (total uint32, free uint32) {
return 0, 0
}

View File

@ -0,0 +1,25 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package info
import "testing"
func TestGetSysteminfo(t *testing.T) {
t.Log(getSystemInfo())
}

View File

@ -0,0 +1,37 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package monitor
import (
"github.com/goodrain/rainbond/node/statsd"
)
//Manager Manager
type Manager interface {
Start(errchan chan error)
Stop() error
}
type manager struct {
statsdExporter *statsd.Exporter
nodeExporter *nodeExporter
}
type nodeExporter struct {
}

165
node/nodem/node_manager.go Normal file
View File

@ -0,0 +1,165 @@
// RAINBOND, Application Management Platform
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package nodem
import (
"context"
"fmt"
"os"
"runtime"
"strconv"
"strings"
"time"
"github.com/Sirupsen/logrus"
"github.com/goodrain/rainbond/cmd/node/option"
"github.com/goodrain/rainbond/node/nodem/client"
"github.com/goodrain/rainbond/node/nodem/controller"
"github.com/goodrain/rainbond/node/nodem/healthy"
"github.com/goodrain/rainbond/node/nodem/info"
"github.com/goodrain/rainbond/node/nodem/monitor"
"github.com/goodrain/rainbond/node/nodem/taskrun"
"github.com/goodrain/rainbond/util"
)
//NodeManager node manager
type NodeManager struct {
client.HostNode
ctx context.Context
cancel context.CancelFunc
cluster client.ClusterClient
monitor *monitor.Manager
healthy *healthy.Manager
controller *controller.Manager
taskrun *taskrun.Manager
cfg *option.Conf
}
//NewNodeManager new a node manager
func NewNodeManager() *NodeManager {
return &NodeManager{}
}
//Start start
func (n *NodeManager) Start(errchan chan error) {
if err := n.init(); err != nil {
errchan <- err
return
}
go n.heartbeat()
}
//checkNodeHealthy check current node healthy.
//only healthy can controller other service start
func (n *NodeManager) checkNodeHealthy() error {
return nil
}
func (n *NodeManager) heartbeat() {
util.Exec(n.ctx, func() error {
if err := n.cluster.UpdateStatus(&n.HostNode); err != nil {
logrus.Errorf("update node status error %s", err.Error())
}
return nil
}, time.Second*time.Duration(n.cfg.TTL))
}
//init node init
func (n *NodeManager) init() error {
uid, err := util.ReadHostID(n.cfg.HostIDFile)
if err != nil {
return fmt.Errorf("Get host id error:%s", err.Error())
}
node, err := n.cluster.GetNode(uid)
if err != nil {
return err
}
if node == nil {
node, err = n.getCurrentNode(uid)
if err != nil {
return err
}
}
node.NodeStatus.NodeInfo = info.GetSystemInfo()
node.Role = strings.Split(n.cfg.NodeRule, ",")
if node.Labels == nil || len(node.Labels) < 1 {
node.Labels = map[string]string{}
}
for _, rule := range node.Role {
node.Labels["rainbond_node_rule_"+rule] = "true"
}
if node.HostName == "" {
hostname, _ := os.Hostname()
node.HostName = hostname
}
if node.ClusterNode.PID == "" {
node.ClusterNode.PID = strconv.Itoa(os.Getpid())
}
node.Labels["rainbond_node_hostname"] = node.HostName
node.Labels["rainbond_node_ip"] = node.InternalIP
node.UpdataCondition(client.NodeCondition{
Type: client.NodeInit,
Status: client.ConditionTrue,
LastHeartbeatTime: time.Now(),
LastTransitionTime: time.Now(),
})
node.Mode = n.cfg.RunMode
n.HostNode = *node
if node.AvailableMemory == 0 {
node.AvailableMemory = int64(node.NodeStatus.NodeInfo.MemorySize)
}
if node.AvailableCPU == 0 {
node.AvailableCPU = int64(runtime.NumCPU()) * 1000
}
return nil
}
//UpdateNodeStatus UpdateNodeStatus
func (n *NodeManager) UpdateNodeStatus() error {
return n.cluster.UpdateStatus(&n.HostNode)
}
//getCurrentNode get current node info
func (n *NodeManager) getCurrentNode(uid string) (*client.HostNode, error) {
if n.cfg.HostIP == "" {
ip, err := util.LocalIP()
if err != nil {
return nil, err
}
n.cfg.HostIP = ip.String()
}
node := CreateNode(uid, n.cfg.HostIP)
return &node, nil
}
//CreateNode new node
func CreateNode(nodeID, ip string) client.HostNode {
HostNode := client.HostNode{
ID: nodeID,
ClusterNode: client.ClusterNode{
PID: strconv.Itoa(os.Getpid()),
},
InternalIP: ip,
ExternalIP: ip,
CreateTime: time.Now(),
NodeStatus: &client.NodeStatus{},
}
return HostNode
}

View File

@ -0,0 +1,235 @@
// Copyright (C) 2014-2018 Goodrain Co., Ltd.
// RAINBOND, Application Management Platform
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package taskrun
import (
"github.com/goodrain/rainbond/node/core/job"
)
//Manager Manager
type Manager interface {
Start(errchan chan error)
Stop() error
}
// //Config config server
// type Config struct {
// EtcdEndPoints []string
// EtcdTimeout int
// EtcdPrefix string
// ClusterName string
// APIAddr string
// K8SConfPath string
// EventServerAddress []string
// PrometheusMetricPath string
// TTL int64
// }
//Jobs jobs
type Jobs map[string]*job.Job
// //manager node manager server
// type manager struct {
// cluster client.ClusterClient
// *cron.Cron
// ctx context.Context
// jobs Jobs // 和结点相关的任务
// onceJobs Jobs //记录执行的单任务
// jobLock sync.Mutex
// cmds map[string]*corejob.Cmd
// delIDs map[string]bool
// ttl int64
// }
// //Run taskrun start
// func (n *manager) Start(errchan chan error) {
// go n.watchJobs(errchan)
// n.Cron.Start()
// if err := corejob.StartProc(); err != nil {
// logrus.Warnf("[process key will not timeout]proc lease id set err: %s", err.Error())
// }
// return
// }
// func (n *manager) watchJobs(errChan chan error) error {
// watcher := watch.New(store.DefalutClient.Client, "")
// watchChan, err := watcher.WatchList(n.ctx, n.Conf.JobPath, "")
// if err != nil {
// errChan <- err
// return err
// }
// defer watchChan.Stop()
// for event := range watchChan.ResultChan() {
// switch event.Type {
// case watch.Added:
// j := new(job.Job)
// err := j.Decode(event.GetValue())
// if err != nil {
// logrus.Errorf("decode job error :%s", err)
// continue
// }
// n.addJob(j)
// case watch.Modified:
// j := new(job.Job)
// err := j.Decode(event.GetValue())
// if err != nil {
// logrus.Errorf("decode job error :%s", err)
// continue
// }
// n.modJob(j)
// case watch.Deleted:
// n.delJob(event.GetKey())
// default:
// logrus.Errorf("watch job error:%v", event.Error)
// errChan <- event.Error
// }
// }
// return nil
// }
// //添加job缓存
// func (n *manager) addJob(j *corejob.Job) {
// if !j.IsRunOn(n.HostNode) {
// return
// }
// //一次性任务
// if j.Rules.Mode != corejob.Cycle {
// n.runOnceJob(j)
// return
// }
// n.jobLock.Lock()
// defer n.jobLock.Unlock()
// n.jobs[j.ID] = j
// cmds := j.Cmds(n.HostNode)
// if len(cmds) == 0 {
// return
// }
// for _, cmd := range cmds {
// n.addCmd(cmd)
// }
// return
// }
// func (n *manager) delJob(id string) {
// n.jobLock.Lock()
// defer n.jobLock.Unlock()
// n.delIDs[id] = true
// job, ok := n.jobs[id]
// // 之前此任务没有在当前结点执行
// if !ok {
// return
// }
// cmds := job.Cmds(n.HostNode)
// if len(cmds) == 0 {
// return
// }
// for _, cmd := range cmds {
// n.delCmd(cmd)
// }
// delete(n.jobs, id)
// return
// }
// func (n *manager) modJob(job *corejob.Job) {
// if !job.IsRunOn(n.HostNode) {
// return
// }
// //一次性任务
// if job.Rules.Mode != corejob.Cycle {
// n.runOnceJob(job)
// return
// }
// oJob, ok := n.jobs[job.ID]
// // 之前此任务没有在当前结点执行,直接增加任务
// if !ok {
// n.addJob(job)
// return
// }
// prevCmds := oJob.Cmds(n.HostNode)
// job.Count = oJob.Count
// *oJob = *job
// cmds := oJob.Cmds(n.HostNode)
// for id, cmd := range cmds {
// n.modCmd(cmd)
// delete(prevCmds, id)
// }
// for _, cmd := range prevCmds {
// n.delCmd(cmd)
// }
// }
// func (n *manager) addCmd(cmd *corejob.Cmd) {
// n.Cron.Schedule(cmd.Rule.Schedule, cmd)
// n.cmds[cmd.GetID()] = cmd
// logrus.Infof("job[%s] rule[%s] timer[%s] has added", cmd.Job.ID, cmd.Rule.ID, cmd.Rule.Timer)
// return
// }
// func (n *manager) modCmd(cmd *corejob.Cmd) {
// c, ok := n.cmds[cmd.GetID()]
// if !ok {
// n.addCmd(cmd)
// return
// }
// sch := c.Rule.Timer
// *c = *cmd
// // 节点执行时间改变,更新 cron
// // 否则不用更新 cron
// if c.Rule.Timer != sch {
// n.Cron.Schedule(c.Rule.Schedule, c)
// }
// logrus.Infof("job[%s] rule[%s] timer[%s] has updated", c.Job.ID, c.Rule.ID, c.Rule.Timer)
// }
// func (n *manager) delCmd(cmd *corejob.Cmd) {
// delete(n.cmds, cmd.GetID())
// n.Cron.DelJob(cmd)
// logrus.Infof("job[%s] rule[%s] timer[%s] has deleted", cmd.Job.ID, cmd.Rule.ID, cmd.Rule.Timer)
// }
// //job must be schedulered
// func (n *manager) runOnceJob(j *corejob.Job) {
// go j.RunWithRecovery()
// }
// //Stop 停止服务
// func (n *manager) Stop(i interface{}) {
// n.Cron.Stop()
// }
// //Newmanager new server
// func Newmanager(cfg *conf.Conf) (*manager, error) {
// currentNode, err := GetCurrentNode(cfg)
// if err != nil {
// return nil, err
// }
// if cfg.TTL == 0 {
// cfg.TTL = 10
// }
// n := &manager{
// Cron: cron.New(),
// jobs: make(Jobs, 8),
// onceJobs: make(Jobs, 8),
// cmds: make(map[string]*corejob.Cmd),
// delIDs: make(map[string]bool, 8),
// ttl: cfg.TTL,
// }
// return n, nil
// }

227
node/nodem/taskrun/task.go Normal file
View File

@ -0,0 +1,227 @@
// Copyright (C) 2014-2018 Goodrain Co., Ltd.
// RAINBOND, Application Management Platform
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version. For any non-GPL usage of Rainbond,
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
// must be obtained first.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package taskrun
import (
"time"
"github.com/pquerna/ffjson/ffjson"
)
//Shell 执行脚本配置
type Shell struct {
Cmd []string `json:"cmd"`
}
//TaskTemp 任务模版
type TaskTemp struct {
Name string `json:"name" validate:"name|required"`
ID string `json:"id" validate:"id|uuid"`
Shell Shell `json:"shell"`
Envs map[string]string `json:"envs,omitempty"`
Input string `json:"input,omitempty"`
Args []string `json:"args,omitempty"`
Depends []DependStrategy `json:"depends,omitempty"`
Timeout int `json:"timeout" validate:"timeout|required|numeric"`
CreateTime time.Time `json:"create_time"`
Labels map[string]string `json:"labels,omitempty"`
}
//DependStrategy 依赖策略
type DependStrategy struct {
DependTaskID string `json:"depend_task_id"`
DetermineStrategy string `json:"strategy"`
}
//AtLeastOnceStrategy 至少已执行一次
var AtLeastOnceStrategy = "AtLeastOnce"
//SameNodeStrategy 相同节点已执行
var SameNodeStrategy = "SameNode"
func (t TaskTemp) String() string {
res, _ := ffjson.Marshal(&t)
return string(res)
}
//Task 任务
type Task struct {
Name string `json:"name" validate:"name|required"`
ID string `json:"id" validate:"id|uuid"`
TempID string `json:"temp_id,omitempty" validate:"temp_id|uuid"`
Temp *TaskTemp `json:"temp,omitempty"`
GroupID string `json:"group_id,omitempty"`
//执行的节点
Nodes []string `json:"nodes"`
//执行时间定义
//例如每30分钟执行一次:@every 30m
Timer string `json:"timer"`
TimeOut int64 `json:"time_out"`
// 执行任务失败重试次数
// 默认为 0不重试
Retry int `json:"retry"`
// 执行任务失败重试时间间隔
// 单位秒,如果不大于 0 则马上重试
Interval int `json:"interval"`
//ExecCount 执行次数
ExecCount int `json:"exec_count"`
//每个执行节点执行状态
Status map[string]TaskStatus `json:"status,omitempty"`
Scheduler Scheduler `json:"scheduler"`
CreateTime time.Time `json:"create_time"`
StartTime time.Time `json:"start_time"`
CompleteTime time.Time `json:"complete_time"`
ResultPath string `json:"result_path"`
EventID string `json:"event_id"`
RunMode string `json:"run_mode"`
OutPut []*TaskOutPut `json:"out_put"`
}
func (t Task) String() string {
res, _ := ffjson.Marshal(&t)
return string(res)
}
//Decode Decode
func (t *Task) Decode(data []byte) error {
return ffjson.Unmarshal(data, t)
}
//UpdataOutPut 更新状态
func (t *Task) UpdataOutPut(output TaskOutPut) {
updateIndex := -1
for i, oldOut := range t.OutPut {
if oldOut.NodeID == output.NodeID {
updateIndex = i
break
}
}
if updateIndex != -1 {
t.OutPut[updateIndex] = &output
return
}
t.OutPut = append(t.OutPut, &output)
}
//CanBeDelete 能否被删除
func (t Task) CanBeDelete() bool {
if t.Status == nil || len(t.Status) == 0 {
return true
}
for _, v := range t.Status {
if v.Status != "create" {
return false
}
}
return true
}
//Scheduler 调度状态
type Scheduler struct {
Mode string `json:"mode"` //立即调度Intime触发调度Passive
Status map[string]SchedulerStatus `json:"status"`
}
//SchedulerStatus 调度状态
type SchedulerStatus struct {
Status string `json:"status"`
Message string `json:"message"`
SchedulerTime time.Time `json:"scheduler_time"` //调度时间
SchedulerMaster string `json:"scheduler_master"` //调度的管理节点
}
//TaskOutPut 任务输出
type TaskOutPut struct {
NodeID string `json:"node_id"`
JobID string `json:"job_id"`
Global map[string]string `json:"global"`
Inner map[string]string `json:"inner"`
//返回数据类型,检测结果类(check) 执行安装类 (install) 普通类 (common)
Type string `json:"type"`
Status []TaskOutPutStatus `json:"status"`
ExecStatus string `json:"exec_status"`
Body string `json:"body"`
}
//ParseTaskOutPut json parse
func ParseTaskOutPut(body string) (t TaskOutPut, err error) {
t.Body = body
err = ffjson.Unmarshal([]byte(body), &t)
return
}
//TaskOutPutStatus 输出数据
type TaskOutPutStatus struct {
Name string `json:"name"`
//节点属性
ConditionType string `json:"condition_type"`
//节点属性值
ConditionStatus string `json:"condition_status"`
NextTask []string `json:"next_tasks,omitempty"`
NextGroups []string `json:"next_groups,omitempty"`
}
//TaskStatus 任务状态
type TaskStatus struct {
JobID string `json:"job_id"`
Status string `json:"status"` //执行状态create init exec complete timeout
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
TakeTime int `json:"take_time"`
CompleStatus string `json:"comple_status"`
//脚本退出码
ShellCode int `json:"shell_code"`
Message string `json:"message,omitempty"`
}
//TaskGroup 任务组
type TaskGroup struct {
Name string `json:"name" validate:"name|required"`
ID string `json:"id" validate:"id|uuid"`
Tasks []*Task `json:"tasks"`
CreateTime time.Time `json:"create_time"`
Status *TaskGroupStatus `json:"status"`
}
func (t TaskGroup) String() string {
res, _ := ffjson.Marshal(&t)
return string(res)
}
//CanBeDelete 是否能被删除
func (t TaskGroup) CanBeDelete() bool {
if t.Status == nil || len(t.Status.TaskStatus) == 0 {
return true
}
for _, v := range t.Status.TaskStatus {
if v.Status != "create" {
return false
}
}
return true
}
//TaskGroupStatus 任务组状态
type TaskGroupStatus struct {
TaskStatus map[string]TaskStatus `json:"task_status"`
InitTime time.Time `json:"init_time"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Status string `json:"status"` //create init exec complete timeout
}

View File

@ -20,6 +20,7 @@ package util
import (
"archive/zip"
"crypto/md5"
"fmt"
"io"
"io/ioutil"
@ -34,7 +35,6 @@ import (
"time"
"github.com/Sirupsen/logrus"
"github.com/twinj/uuid"
)
//CheckAndCreateDir check and create dir
@ -177,7 +177,10 @@ func ReadHostID(filePath string) (string, error) {
_, err := os.Stat(filePath)
if err != nil {
if strings.HasSuffix(err.Error(), "no such file or directory") {
uid := uuid.NewV4().String()
uid, err := CreateHostID()
if err != nil {
return "", err
}
err = ioutil.WriteFile(filePath, []byte("host_uuid="+uid), 0777)
if err != nil {
logrus.Error("Write host_uuid file error.", err.Error())
@ -197,6 +200,42 @@ func ReadHostID(filePath string) (string, error) {
return "", fmt.Errorf("Invalid host uuid from file")
}
//CreateHostID create host id by mac addr
func CreateHostID() (string, error) {
macAddrs := getMacAddrs()
if macAddrs == nil || len(macAddrs) == 0 {
return "", fmt.Errorf("read macaddr error when create node id")
}
ip, _ := LocalIP()
hash := md5.New()
hash.Write([]byte(macAddrs[0] + ip.String()))
uid := fmt.Sprintf("%x", hash.Sum(nil))
if len(uid) >= 32 {
return uid[:32], nil
}
for i := len(uid); i < 32; i++ {
uid = uid + "0"
}
return uid, nil
}
func getMacAddrs() (macAddrs []string) {
netInterfaces, err := net.Interfaces()
if err != nil {
fmt.Printf("fail to get net interfaces: %v", err)
return macAddrs
}
for _, netInterface := range netInterfaces {
macAddr := netInterface.HardwareAddr.String()
if len(macAddr) == 0 {
continue
}
macAddrs = append(macAddrs, macAddr)
}
return macAddrs
}
//LocalIP 获取本机 ip
// 获取第一个非 loopback ip
func LocalIP() (net.IP, error) {
@ -368,7 +407,7 @@ func listDirNonSymlink(dir string) []os.FileInfo {
var result []os.FileInfo
for i := range entries {
if entries[i].Mode() & os.ModeSymlink == 0 {
if entries[i].Mode()&os.ModeSymlink == 0 {
result = append(result, entries[i])
}
}

View File

@ -82,3 +82,11 @@ func TestMergeDir(t *testing.T) {
t.Fatal(err)
}
}
func TestCreateHostID(t *testing.T) {
uid, err := CreateHostID()
if err != nil {
t.Fatal(err)
}
t.Log(uid)
}