update service condition when heartbeat or get node

This commit is contained in:
凡羊羊 2019-11-15 16:48:21 +08:00
parent 3d16d353df
commit 946a1b81b0
3 changed files with 43 additions and 62 deletions

View File

@ -121,24 +121,10 @@ func (n *Cluster) handleNodeStatus(v *client.HostNode) {
if time.Since(v.NodeStatus.NodeUpdateTime) > time.Minute*1 {
v.Status = client.Unknown
v.NodeStatus.Status = client.Unknown
r := client.NodeCondition{
Type: client.NodeUp,
Status: client.ConditionFalse,
LastHeartbeatTime: time.Now(),
LastTransitionTime: time.Now(),
Message: "Node lost connection, state unknown",
}
v.UpdataCondition(r)
v.GetAndUpdateCondition(client.NodeUp, client.ConditionFalse, "", "Node lost connection, state unknown")
v.NodeStatus.AdviceAction = append(v.NodeStatus.AdviceAction, "offline")
} else {
r := client.NodeCondition{
Type: client.NodeUp,
Status: client.ConditionTrue,
LastHeartbeatTime: time.Now(),
LastTransitionTime: time.Now(),
Message: "Node lost connection, state unknown",
}
v.UpdataCondition(r)
v.GetAndUpdateCondition(client.NodeUp, client.ConditionTrue, "", "")
v.NodeStatus.CurrentScheduleStatus = !v.Unschedulable
if v.Role.HasRule("compute") {
k8sNode, err := n.kubecli.GetNode(v.ID)

View File

@ -30,7 +30,7 @@ import (
"github.com/goodrain/rainbond/node/core/store"
"github.com/goodrain/rainbond/util"
"github.com/pquerna/ffjson/ffjson"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
)
//LabelOS node label about os
@ -346,6 +346,31 @@ func (n *HostNode) GetCondition(ctype NodeConditionType) *NodeCondition {
return nil
}
// GetAndUpdateCondition get old condition and update it, if old condition is nil and then create it
func (n *HostNode) GetAndUpdateCondition(condType NodeConditionType, status ConditionStatus, reason, message string) {
oldCond := n.GetCondition(condType)
now := time.Now()
var lastTransitionTime time.Time
if oldCond == nil {
lastTransitionTime = now
} else {
if oldCond.Status != status {
lastTransitionTime = now
} else {
lastTransitionTime = oldCond.LastTransitionTime
}
}
cond := NodeCondition{
Type: condType,
Status: status,
LastHeartbeatTime: now,
LastTransitionTime: lastTransitionTime,
Reason: reason,
Message: message,
}
n.UpdataCondition(cond)
}
//UpdataCondition 更新状态
func (n *HostNode) UpdataCondition(conditions ...NodeCondition) {
for _, newcon := range conditions {

View File

@ -184,6 +184,9 @@ func (n *NodeManager) heartbeat() {
n.currentNode.NodeStatus.NodeInfo = currentNode.NodeStatus.NodeInfo
for k, v := range allServiceHealth {
if ser := n.controller.GetService(k); ser != nil {
status := client.ConditionTrue
message := ""
reason := ""
if ser.ServiceHealth != nil {
maxNum := ser.ServiceHealth.MaxErrorsNum
if maxNum < 2 {
@ -191,47 +194,19 @@ func (n *NodeManager) heartbeat() {
}
if v.Status != service.Stat_healthy && v.ErrorNumber > maxNum {
allHealth = false
n.currentNode.UpdataCondition(
client.NodeCondition{
Type: client.NodeConditionType(ser.Name),
Status: client.ConditionFalse,
LastHeartbeatTime: time.Now(),
LastTransitionTime: time.Now(),
Message: v.Info,
Reason: "NotHealth",
})
}
if v.Status == service.Stat_healthy {
old := n.currentNode.GetCondition(client.NodeConditionType(ser.Name))
if old == nil || old.Status == client.ConditionFalse {
n.currentNode.UpdataCondition(
client.NodeCondition{
Type: client.NodeConditionType(ser.Name),
Status: client.ConditionTrue,
LastHeartbeatTime: time.Now(),
LastTransitionTime: time.Now(),
Reason: "Health",
})
}
}
if n.cfg.AutoUnschedulerUnHealthDuration == 0 {
continue
}
if v.ErrorDuration > n.cfg.AutoUnschedulerUnHealthDuration && n.cfg.AutoScheduler {
n.currentNode.NodeStatus.AdviceAction = []string{"unscheduler"}
}
} else {
old := n.currentNode.GetCondition(client.NodeConditionType(ser.Name))
if old == nil {
n.currentNode.UpdataCondition(
client.NodeCondition{
Type: client.NodeConditionType(ser.Name),
Status: client.ConditionTrue,
LastHeartbeatTime: time.Now(),
LastTransitionTime: time.Now(),
})
status = client.ConditionFalse
message = v.Info
reason = "NotHealth"
}
}
n.currentNode.GetAndUpdateCondition(client.NodeConditionType(ser.Name), status, reason, message)
if n.cfg.AutoUnschedulerUnHealthDuration == 0 {
continue
}
if v.ErrorDuration > n.cfg.AutoUnschedulerUnHealthDuration && n.cfg.AutoScheduler {
n.currentNode.NodeStatus.AdviceAction = []string{"unscheduler"}
}
}
}
if allHealth && n.cfg.AutoScheduler {
@ -333,12 +308,7 @@ func (n *NodeManager) getCurrentNode(uid string) (*client.HostNode, error) {
node := CreateNode(uid, n.cfg.HostIP)
n.setNodeLabels(&node)
node.NodeStatus.NodeInfo = info.GetSystemInfo()
node.UpdataCondition(client.NodeCondition{
Type: client.NodeInit,
Status: client.ConditionTrue,
LastHeartbeatTime: time.Now(),
LastTransitionTime: time.Now(),
})
node.GetAndUpdateCondition(client.NodeInit, client.ConditionTrue, "", "")
node.Mode = n.cfg.RunMode
node.NodeStatus.Status = "running"
return &node, nil