2022-10-11 11:39:22 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
package session
|
|
|
|
|
2022-11-10 15:01:04 +08:00
|
|
|
import (
|
2022-12-28 10:17:30 +08:00
|
|
|
"fmt"
|
2022-11-10 15:01:04 +08:00
|
|
|
"sync"
|
2022-12-05 15:09:20 +08:00
|
|
|
"time"
|
2022-11-10 15:01:04 +08:00
|
|
|
|
2024-03-01 16:07:37 +08:00
|
|
|
"github.com/blang/semver/v4"
|
2022-12-05 15:09:20 +08:00
|
|
|
"go.uber.org/atomic"
|
2024-03-27 16:15:19 +08:00
|
|
|
"go.uber.org/zap"
|
2023-09-21 09:45:27 +08:00
|
|
|
|
2024-03-27 16:15:19 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
2023-09-21 09:45:27 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/metrics"
|
2024-03-27 16:15:19 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
2022-11-10 15:01:04 +08:00
|
|
|
)
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
type Manager interface {
|
|
|
|
Add(node *NodeInfo)
|
2022-12-06 22:59:19 +08:00
|
|
|
Stopping(nodeID int64)
|
2022-09-15 18:48:32 +08:00
|
|
|
Remove(nodeID int64)
|
|
|
|
Get(nodeID int64) *NodeInfo
|
|
|
|
GetAll() []*NodeInfo
|
2024-03-27 16:15:19 +08:00
|
|
|
|
|
|
|
Suspend(nodeID int64) error
|
|
|
|
Resume(nodeID int64) error
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
type NodeManager struct {
|
|
|
|
mu sync.RWMutex
|
|
|
|
nodes map[int64]*NodeInfo
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *NodeManager) Add(node *NodeInfo) {
|
|
|
|
m.mu.Lock()
|
|
|
|
defer m.mu.Unlock()
|
|
|
|
m.nodes[node.ID()] = node
|
2022-11-10 15:01:04 +08:00
|
|
|
metrics.QueryCoordNumQueryNodes.WithLabelValues().Set(float64(len(m.nodes)))
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (m *NodeManager) Remove(nodeID int64) {
|
|
|
|
m.mu.Lock()
|
|
|
|
defer m.mu.Unlock()
|
|
|
|
delete(m.nodes, nodeID)
|
2022-11-10 15:01:04 +08:00
|
|
|
metrics.QueryCoordNumQueryNodes.WithLabelValues().Set(float64(len(m.nodes)))
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
2022-12-06 22:59:19 +08:00
|
|
|
func (m *NodeManager) Stopping(nodeID int64) {
|
|
|
|
m.mu.Lock()
|
|
|
|
defer m.mu.Unlock()
|
|
|
|
if nodeInfo, ok := m.nodes[nodeID]; ok {
|
|
|
|
nodeInfo.SetState(NodeStateStopping)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-27 16:15:19 +08:00
|
|
|
func (m *NodeManager) Suspend(nodeID int64) error {
|
|
|
|
m.mu.Lock()
|
|
|
|
defer m.mu.Unlock()
|
|
|
|
nodeInfo, ok := m.nodes[nodeID]
|
|
|
|
if !ok {
|
|
|
|
return merr.WrapErrNodeNotFound(nodeID)
|
|
|
|
}
|
|
|
|
switch nodeInfo.GetState() {
|
|
|
|
case NodeStateNormal:
|
|
|
|
nodeInfo.SetState(NodeStateSuspend)
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
log.Warn("failed to suspend query node", zap.Int64("nodeID", nodeID), zap.String("state", nodeInfo.GetState().String()))
|
|
|
|
return merr.WrapErrNodeStateUnexpected(nodeID, nodeInfo.GetState().String(), "failed to suspend a query node")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *NodeManager) Resume(nodeID int64) error {
|
|
|
|
m.mu.Lock()
|
|
|
|
defer m.mu.Unlock()
|
|
|
|
nodeInfo, ok := m.nodes[nodeID]
|
|
|
|
if !ok {
|
|
|
|
return merr.WrapErrNodeNotFound(nodeID)
|
|
|
|
}
|
|
|
|
|
|
|
|
switch nodeInfo.GetState() {
|
|
|
|
case NodeStateSuspend:
|
|
|
|
nodeInfo.SetState(NodeStateNormal)
|
|
|
|
return nil
|
|
|
|
|
|
|
|
default:
|
|
|
|
log.Warn("failed to resume query node", zap.Int64("nodeID", nodeID), zap.String("state", nodeInfo.GetState().String()))
|
|
|
|
return merr.WrapErrNodeStateUnexpected(nodeID, nodeInfo.GetState().String(), "failed to resume query node")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-28 10:17:30 +08:00
|
|
|
func (m *NodeManager) IsStoppingNode(nodeID int64) (bool, error) {
|
|
|
|
m.mu.RLock()
|
|
|
|
defer m.mu.RUnlock()
|
|
|
|
|
|
|
|
node := m.nodes[nodeID]
|
|
|
|
if node == nil {
|
|
|
|
return false, fmt.Errorf("nodeID[%d] isn't existed", nodeID)
|
|
|
|
}
|
|
|
|
return node.IsStoppingState(), nil
|
|
|
|
}
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
func (m *NodeManager) Get(nodeID int64) *NodeInfo {
|
|
|
|
m.mu.RLock()
|
|
|
|
defer m.mu.RUnlock()
|
|
|
|
return m.nodes[nodeID]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *NodeManager) GetAll() []*NodeInfo {
|
|
|
|
m.mu.RLock()
|
|
|
|
defer m.mu.RUnlock()
|
|
|
|
ret := make([]*NodeInfo, 0, len(m.nodes))
|
|
|
|
for _, n := range m.nodes {
|
|
|
|
ret = append(ret, n)
|
|
|
|
}
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewNodeManager() *NodeManager {
|
|
|
|
return &NodeManager{
|
|
|
|
nodes: make(map[int64]*NodeInfo),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-06 22:59:19 +08:00
|
|
|
type State int
|
|
|
|
|
|
|
|
const (
|
2024-03-27 16:15:19 +08:00
|
|
|
NormalStateName = "Normal"
|
|
|
|
StoppingStateName = "Stopping"
|
|
|
|
SuspendStateName = "Suspend"
|
2022-12-06 22:59:19 +08:00
|
|
|
)
|
|
|
|
|
2024-03-15 10:45:06 +08:00
|
|
|
type ImmutableNodeInfo struct {
|
|
|
|
NodeID int64
|
|
|
|
Address string
|
|
|
|
Hostname string
|
|
|
|
Version semver.Version
|
|
|
|
}
|
|
|
|
|
2024-03-27 16:15:19 +08:00
|
|
|
const (
|
|
|
|
NodeStateNormal State = iota
|
|
|
|
NodeStateStopping
|
|
|
|
NodeStateSuspend
|
|
|
|
)
|
|
|
|
|
|
|
|
var stateNameMap = map[State]string{
|
|
|
|
NodeStateNormal: NormalStateName,
|
|
|
|
NodeStateStopping: StoppingStateName,
|
|
|
|
NodeStateSuspend: SuspendStateName,
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s State) String() string {
|
|
|
|
return stateNameMap[s]
|
|
|
|
}
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
type NodeInfo struct {
|
|
|
|
stats
|
2022-12-05 15:09:20 +08:00
|
|
|
mu sync.RWMutex
|
2024-03-15 10:45:06 +08:00
|
|
|
immutableInfo ImmutableNodeInfo
|
2022-12-06 22:59:19 +08:00
|
|
|
state State
|
2022-12-05 15:09:20 +08:00
|
|
|
lastHeartbeat *atomic.Int64
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (n *NodeInfo) ID() int64 {
|
2024-03-15 10:45:06 +08:00
|
|
|
return n.immutableInfo.NodeID
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (n *NodeInfo) Addr() string {
|
2024-03-15 10:45:06 +08:00
|
|
|
return n.immutableInfo.Address
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *NodeInfo) Hostname() string {
|
|
|
|
return n.immutableInfo.Hostname
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (n *NodeInfo) SegmentCnt() int {
|
|
|
|
n.mu.RLock()
|
|
|
|
defer n.mu.RUnlock()
|
|
|
|
return n.stats.getSegmentCnt()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *NodeInfo) ChannelCnt() int {
|
|
|
|
n.mu.RLock()
|
|
|
|
defer n.mu.RUnlock()
|
|
|
|
return n.stats.getChannelCnt()
|
|
|
|
}
|
|
|
|
|
2022-12-05 15:09:20 +08:00
|
|
|
func (n *NodeInfo) SetLastHeartbeat(time time.Time) {
|
|
|
|
n.lastHeartbeat.Store(time.UnixNano())
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *NodeInfo) LastHeartbeat() time.Time {
|
|
|
|
return time.Unix(0, n.lastHeartbeat.Load())
|
|
|
|
}
|
|
|
|
|
2022-12-06 22:59:19 +08:00
|
|
|
func (n *NodeInfo) IsStoppingState() bool {
|
|
|
|
n.mu.RLock()
|
|
|
|
defer n.mu.RUnlock()
|
|
|
|
return n.state == NodeStateStopping
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *NodeInfo) SetState(s State) {
|
|
|
|
n.mu.Lock()
|
|
|
|
defer n.mu.Unlock()
|
|
|
|
n.state = s
|
|
|
|
}
|
|
|
|
|
2024-03-27 16:15:19 +08:00
|
|
|
func (n *NodeInfo) GetState() State {
|
|
|
|
n.mu.RLock()
|
|
|
|
defer n.mu.RUnlock()
|
|
|
|
return n.state
|
|
|
|
}
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
func (n *NodeInfo) UpdateStats(opts ...StatsOption) {
|
|
|
|
n.mu.Lock()
|
|
|
|
for _, opt := range opts {
|
|
|
|
opt(n)
|
|
|
|
}
|
|
|
|
n.mu.Unlock()
|
|
|
|
}
|
|
|
|
|
2024-03-01 16:07:37 +08:00
|
|
|
func (n *NodeInfo) Version() semver.Version {
|
2024-03-15 10:45:06 +08:00
|
|
|
return n.immutableInfo.Version
|
2024-03-01 16:07:37 +08:00
|
|
|
}
|
|
|
|
|
2024-03-15 10:45:06 +08:00
|
|
|
func NewNodeInfo(info ImmutableNodeInfo) *NodeInfo {
|
2022-09-15 18:48:32 +08:00
|
|
|
return &NodeInfo{
|
2022-12-05 15:09:20 +08:00
|
|
|
stats: newStats(),
|
2024-03-15 10:45:06 +08:00
|
|
|
immutableInfo: info,
|
2022-12-05 15:09:20 +08:00
|
|
|
lastHeartbeat: atomic.NewInt64(0),
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type StatsOption func(*NodeInfo)
|
|
|
|
|
|
|
|
func WithSegmentCnt(cnt int) StatsOption {
|
|
|
|
return func(n *NodeInfo) {
|
|
|
|
n.setSegmentCnt(cnt)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func WithChannelCnt(cnt int) StatsOption {
|
|
|
|
return func(n *NodeInfo) {
|
|
|
|
n.setChannelCnt(cnt)
|
|
|
|
}
|
|
|
|
}
|