mirror of
https://gitee.com/rainbond/Rainbond.git
synced 2024-11-30 18:58:02 +08:00
395 lines
10 KiB
Go
395 lines
10 KiB
Go
// RAINBOND, Application Management Platform
|
|
// Copyright (C) 2014-2017 Goodrain Co., Ltd.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version. For any non-GPL usage of Rainbond,
|
|
// one or multiple Commercial Licenses authorized by Goodrain Co., Ltd.
|
|
// must be obtained first.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
// 当前包处理集群节点的管理
|
|
package masterserver
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/pquerna/ffjson/ffjson"
|
|
|
|
"github.com/coreos/etcd/mvcc/mvccpb"
|
|
|
|
"k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/pkg/api/v1"
|
|
|
|
client "github.com/coreos/etcd/clientv3"
|
|
"github.com/goodrain/rainbond/cmd/node/option"
|
|
"github.com/goodrain/rainbond/pkg/node/api/model"
|
|
"github.com/goodrain/rainbond/pkg/node/core/store"
|
|
"github.com/goodrain/rainbond/pkg/util"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/watch"
|
|
)
|
|
|
|
//NodeCluster 节点管理器
|
|
type NodeCluster struct {
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
nodes map[string]*model.HostNode
|
|
lock sync.Mutex
|
|
client *store.Client
|
|
k8sClient *kubernetes.Clientset
|
|
currentNode *model.HostNode
|
|
checkInstall chan *model.HostNode
|
|
}
|
|
|
|
//CreateNodeCluster 创建节点管理器
|
|
func CreateNodeCluster(k8sClient *kubernetes.Clientset, node *model.HostNode) (*NodeCluster, error) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
nc := NodeCluster{
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
nodes: make(map[string]*model.HostNode, 5),
|
|
client: store.DefalutClient,
|
|
k8sClient: k8sClient,
|
|
currentNode: node,
|
|
checkInstall: make(chan *model.HostNode, 4),
|
|
}
|
|
if err := nc.loadNodes(); err != nil {
|
|
return nil, err
|
|
}
|
|
return &nc, nil
|
|
}
|
|
|
|
//Start 启动
|
|
func (n *NodeCluster) Start() {
|
|
go n.watchNodes()
|
|
go n.watchK8sNodes()
|
|
go n.worker()
|
|
}
|
|
|
|
//Stop 停止
|
|
func (n *NodeCluster) Stop(i interface{}) {
|
|
n.cancel()
|
|
}
|
|
func (n *NodeCluster) loadNodes() error {
|
|
//加载节点信息
|
|
res, err := n.client.Get(option.Config.NodePath, client.WithPrefix())
|
|
if err != nil {
|
|
return fmt.Errorf("load cluster nodes error:%s", err.Error())
|
|
}
|
|
for _, kv := range res.Kvs {
|
|
if node := n.getNodeFromKV(kv); node != nil {
|
|
n.CacheNode(node)
|
|
}
|
|
}
|
|
//加载rainbond节点在线信息
|
|
res, err = n.client.Get(option.Config.OnlineNodePath, client.WithPrefix())
|
|
if err != nil {
|
|
return fmt.Errorf("load cluster nodes error:%s", err.Error())
|
|
}
|
|
for _, kv := range res.Kvs {
|
|
if node := n.getNodeFromKey(string(kv.Key)); node != nil {
|
|
if !node.Alived {
|
|
node.Alived = true
|
|
node.UpTime = time.Now()
|
|
}
|
|
}
|
|
}
|
|
//加载k8s节点信息
|
|
go func() {
|
|
for {
|
|
list, err := n.k8sClient.Core().Nodes().List(metav1.ListOptions{})
|
|
if err != nil {
|
|
logrus.Warnf("load k8s nodes from k8s api error:%s", err.Error())
|
|
time.Sleep(time.Second * 3)
|
|
continue
|
|
}
|
|
for _, node := range list.Items {
|
|
if cn, ok := n.nodes[node.Name]; ok {
|
|
cn.NodeStatus = &node.Status
|
|
cn.UpdataK8sCondition(node.Status.Conditions)
|
|
n.UpdateNode(cn)
|
|
} else {
|
|
logrus.Warningf("k8s node %s can not exist in rainbond cluster.", node.Name)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
}()
|
|
return nil
|
|
}
|
|
|
|
func (n *NodeCluster) worker() {
|
|
for {
|
|
select {
|
|
case newNode := <-n.checkInstall:
|
|
go n.checkNodeInstall(newNode)
|
|
//其他异步任务
|
|
|
|
case <-n.ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
//UpdateNode 更新节点信息
|
|
func (n *NodeCluster) UpdateNode(node *model.HostNode) {
|
|
n.lock.Lock()
|
|
defer n.lock.Unlock()
|
|
n.nodes[node.ID] = node
|
|
n.client.Put(option.Config.NodePath+"/"+node.ID, node.String())
|
|
}
|
|
func (n *NodeCluster) getNodeFromKV(kv *mvccpb.KeyValue) *model.HostNode {
|
|
var node model.HostNode
|
|
if err := ffjson.Unmarshal(kv.Value, &node); err != nil {
|
|
logrus.Error("parse node info error:", err.Error())
|
|
return nil
|
|
}
|
|
return &node
|
|
}
|
|
func (n *NodeCluster) getNodeFromKey(key string) *model.HostNode {
|
|
index := strings.LastIndex(key, "/")
|
|
if index < 0 {
|
|
return nil
|
|
}
|
|
id := key[index+1:]
|
|
return n.GetNode(id)
|
|
}
|
|
|
|
//GetNode 从缓存获取节点信息
|
|
func (n *NodeCluster) GetNode(id string) *model.HostNode {
|
|
n.lock.Lock()
|
|
defer n.lock.Unlock()
|
|
if node, ok := n.nodes[id]; ok {
|
|
return node
|
|
}
|
|
return nil
|
|
}
|
|
func (n *NodeCluster) watchNodes() {
|
|
ch := n.client.Watch(option.Config.NodePath, client.WithPrefix())
|
|
onlineCh := n.client.Watch(option.Config.OnlineNodePath, client.WithPrefix())
|
|
for {
|
|
select {
|
|
case <-n.ctx.Done():
|
|
return
|
|
case event := <-ch:
|
|
for _, ev := range event.Events {
|
|
switch {
|
|
case ev.IsCreate(), ev.IsModify():
|
|
if node := n.getNodeFromKV(ev.Kv); node != nil {
|
|
n.CacheNode(node)
|
|
}
|
|
case ev.Type == client.EventTypeDelete:
|
|
if node := n.getNodeFromKey(string(ev.Kv.Key)); node != nil {
|
|
n.RemoveNode(node)
|
|
}
|
|
}
|
|
}
|
|
case event := <-onlineCh:
|
|
for _, ev := range event.Events {
|
|
switch {
|
|
case ev.IsCreate(), ev.IsModify():
|
|
if node := n.getNodeFromKey(string(ev.Kv.Key)); node != nil {
|
|
node.Alived = true
|
|
node.UpTime = time.Now()
|
|
n.UpdateNode(node)
|
|
}
|
|
case ev.Type == client.EventTypeDelete:
|
|
if node := n.getNodeFromKey(string(ev.Kv.Key)); node != nil {
|
|
node.Alived = false
|
|
node.DownTime = time.Now()
|
|
n.UpdateNode(node)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (n *NodeCluster) watchK8sNodes() {
|
|
for {
|
|
wc, err := n.k8sClient.Core().Nodes().Watch(metav1.ListOptions{})
|
|
if err != nil {
|
|
logrus.Warningf("watch k8s node error.", err.Error())
|
|
time.Sleep(time.Second * 5)
|
|
continue
|
|
}
|
|
defer func() {
|
|
if wc != nil {
|
|
wc.Stop()
|
|
}
|
|
}()
|
|
loop:
|
|
for {
|
|
select {
|
|
case event, ok := <-wc.ResultChan():
|
|
if !ok {
|
|
time.Sleep(time.Second * 3)
|
|
break loop
|
|
}
|
|
switch {
|
|
case event.Type == watch.Added, event.Type == watch.Modified:
|
|
if node, ok := event.Object.(*v1.Node); ok {
|
|
//k8s node name is rainbond node id
|
|
if rbnode := n.GetNode(node.Name); rbnode != nil {
|
|
rbnode.NodeStatus = &node.Status
|
|
rbnode.NodeStatus.Images = nil
|
|
rbnode.UpdataK8sCondition(node.Status.Conditions)
|
|
if rbnode.AvailableCPU == 0 {
|
|
rbnode.AvailableCPU, _ = node.Status.Allocatable.Cpu().AsInt64()
|
|
}
|
|
if rbnode.AvailableMemory == 0 {
|
|
rbnode.AvailableMemory, _ = node.Status.Allocatable.Memory().AsInt64()
|
|
}
|
|
rbnode.Unschedulable = node.Spec.Unschedulable
|
|
n.UpdateNode(rbnode)
|
|
}
|
|
}
|
|
case event.Type == watch.Deleted:
|
|
if node, ok := event.Object.(*v1.Node); ok {
|
|
if rbnode := n.GetNode(node.Name); rbnode != nil {
|
|
rbnode.NodeStatus = nil
|
|
n.UpdateNode(rbnode)
|
|
}
|
|
}
|
|
default:
|
|
logrus.Warning("don't know the kube api watch event type when watch node.")
|
|
}
|
|
case <-n.ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//InstallNode 安装节点
|
|
func (n *NodeCluster) InstallNode() {
|
|
|
|
}
|
|
|
|
//CheckNodeInstall 简称节点是否安装 rainbond node
|
|
//如果未安装,尝试安装
|
|
func (n *NodeCluster) CheckNodeInstall(node *model.HostNode) {
|
|
n.checkInstall <- node
|
|
}
|
|
func (n *NodeCluster) checkNodeInstall(node *model.HostNode) {
|
|
initCondition := model.NodeCondition{
|
|
Type: model.NodeInit,
|
|
}
|
|
defer func() {
|
|
node.UpdataCondition(initCondition)
|
|
n.UpdateNode(node)
|
|
}()
|
|
node.Status="init"
|
|
errorCondition := func(reason string, err error) {
|
|
initCondition.Status = model.ConditionFalse
|
|
initCondition.LastTransitionTime = time.Now()
|
|
initCondition.LastHeartbeatTime = time.Now()
|
|
initCondition.Reason = reason
|
|
if err != nil {
|
|
initCondition.Message = err.Error()
|
|
}
|
|
node.Conditions = append(node.Conditions, initCondition)
|
|
node.Status="init_failed"
|
|
}
|
|
var stdout bytes.Buffer
|
|
var stderr bytes.Buffer
|
|
if node.Role==nil {
|
|
node.Role=[]string{"compute"}
|
|
}
|
|
role := node.Role[0]
|
|
|
|
etcd := n.currentNode.InternalIP
|
|
cmd := "bash -c \"set " + node.ID + " " + etcd + " " + role + ";$(curl -s repo.goodrain.com/gaops/jobs/install/prepare/init.sh)\""
|
|
logrus.Infof("init endpoint node cmd is %s", cmd)
|
|
client := util.NewSSHClient(node.InternalIP, "root", node.RootPass, cmd, 22, &stdout, &stderr)
|
|
if err := client.Connection(); err != nil {
|
|
logrus.Error("init endpoint node error:", err.Error())
|
|
errorCondition("SSH登陆初始化目标节点失败", err)
|
|
return
|
|
}
|
|
|
|
//TODO:
|
|
//处理安装结果
|
|
fmt.Println("初始化节点成功")
|
|
logrus.Info(stdout.String())
|
|
}
|
|
|
|
//GetAllNode 获取全部节点
|
|
func (n *NodeCluster) GetAllNode() (nodes []*model.HostNode) {
|
|
n.lock.Lock()
|
|
defer n.lock.Unlock()
|
|
for _, node := range n.nodes {
|
|
nodes = append(nodes, node)
|
|
}
|
|
return
|
|
}
|
|
|
|
//CacheNode 添加节点到缓存
|
|
func (n *NodeCluster) CacheNode(node *model.HostNode) {
|
|
n.lock.Lock()
|
|
defer n.lock.Unlock()
|
|
logrus.Debugf("add or update a rainbon node id:%s hostname:%s ip:%s", node.ID, node.HostName, node.InternalIP)
|
|
n.nodes[node.ID] = node
|
|
}
|
|
|
|
//RemoveNode 从缓存移除节点
|
|
func (n *NodeCluster) RemoveNode(node *model.HostNode) {
|
|
n.lock.Lock()
|
|
defer n.lock.Unlock()
|
|
if _, ok := n.nodes[node.ID]; ok {
|
|
delete(n.nodes, node.ID)
|
|
}
|
|
}
|
|
|
|
//UpdateNodeCondition 更新节点状态
|
|
func (n *NodeCluster) UpdateNodeCondition(nodeID, ctype, cvalue string) {
|
|
node := n.GetNode(nodeID)
|
|
if node == nil {
|
|
return
|
|
}
|
|
node.UpdataCondition(model.NodeCondition{
|
|
Type: model.NodeConditionType(ctype),
|
|
Status: model.ConditionStatus(cvalue),
|
|
LastHeartbeatTime: time.Now(),
|
|
LastTransitionTime: time.Now(),
|
|
Message: "",
|
|
Reason: "",
|
|
})
|
|
n.UpdateNode(node)
|
|
}
|
|
|
|
//GetLabelsNode 返回匹配labels的节点ID
|
|
func (n *NodeCluster) GetLabelsNode(labels map[string]string) []string {
|
|
var nodes []string
|
|
for _, node := range n.nodes {
|
|
if checkLables(node, labels) {
|
|
nodes = append(nodes, node.ID)
|
|
}
|
|
}
|
|
return nodes
|
|
}
|
|
|
|
func checkLables(node *model.HostNode, labels map[string]string) bool {
|
|
for k, v := range labels {
|
|
if nodev := node.Labels[k]; nodev != v {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|