2021-11-17 19:39:17 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
2021-06-15 12:41:40 +08:00
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
2021-11-17 19:39:17 +08:00
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
2021-06-15 12:41:40 +08:00
|
|
|
//
|
2021-11-17 19:39:17 +08:00
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2021-06-15 12:41:40 +08:00
|
|
|
|
2021-06-22 16:44:09 +08:00
|
|
|
package querycoord
|
2021-06-15 12:41:40 +08:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2021-06-19 11:45:09 +08:00
|
|
|
"encoding/json"
|
2021-06-15 12:41:40 +08:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
2022-04-20 16:15:41 +08:00
|
|
|
"math"
|
2021-06-19 11:45:09 +08:00
|
|
|
"path/filepath"
|
2022-04-20 16:15:41 +08:00
|
|
|
"sort"
|
2021-06-19 11:45:09 +08:00
|
|
|
"strconv"
|
2021-06-15 12:41:40 +08:00
|
|
|
"sync"
|
|
|
|
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2021-06-19 11:45:09 +08:00
|
|
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
2021-06-15 12:41:40 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
2022-02-28 16:51:55 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/metrics"
|
2021-11-12 18:49:10 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
2021-06-15 12:41:40 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
2021-08-26 14:17:54 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
2021-06-15 12:41:40 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
2021-06-19 11:45:09 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
2021-09-15 20:40:07 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
2021-06-19 11:45:09 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2021-06-22 16:44:09 +08:00
|
|
|
queryNodeInfoPrefix = "queryCoord-queryNodeInfo"
|
2021-06-15 12:41:40 +08:00
|
|
|
)
|
|
|
|
|
2021-09-27 10:47:57 +08:00
|
|
|
// Cluster manages all query node connections and grpc requests
|
2021-08-02 22:39:25 +08:00
|
|
|
type Cluster interface {
|
2022-06-02 13:16:05 +08:00
|
|
|
// Collection/Parition
|
|
|
|
ReleaseCollection(ctx context.Context, nodeID int64, in *querypb.ReleaseCollectionRequest) error
|
|
|
|
ReleasePartitions(ctx context.Context, nodeID int64, in *querypb.ReleasePartitionsRequest) error
|
|
|
|
|
|
|
|
// Segment
|
|
|
|
LoadSegments(ctx context.Context, nodeID int64, in *querypb.LoadSegmentsRequest) error
|
|
|
|
ReleaseSegments(ctx context.Context, nodeID int64, in *querypb.ReleaseSegmentsRequest) error
|
|
|
|
GetSegmentInfo(ctx context.Context, in *querypb.GetSegmentInfoRequest) ([]*querypb.SegmentInfo, error)
|
|
|
|
GetSegmentInfoByNode(ctx context.Context, nodeID int64, in *querypb.GetSegmentInfoRequest) ([]*querypb.SegmentInfo, error)
|
|
|
|
GetSegmentInfoByID(ctx context.Context, segmentID UniqueID) (*querypb.SegmentInfo, error)
|
|
|
|
SyncReplicaSegments(ctx context.Context, leaderID UniqueID, in *querypb.SyncReplicaSegmentsRequest) error
|
|
|
|
|
|
|
|
// Channel
|
|
|
|
WatchDmChannels(ctx context.Context, nodeID int64, in *querypb.WatchDmChannelsRequest) error
|
|
|
|
WatchDeltaChannels(ctx context.Context, nodeID int64, in *querypb.WatchDeltaChannelsRequest) error
|
|
|
|
HasWatchedDeltaChannel(ctx context.Context, nodeID int64, collectionID UniqueID) bool
|
|
|
|
|
|
|
|
// Node
|
|
|
|
RegisterNode(ctx context.Context, session *sessionutil.Session, id UniqueID, state nodeState) error
|
|
|
|
GetNodeInfoByID(nodeID int64) (Node, error)
|
|
|
|
RemoveNodeInfo(nodeID int64) error
|
|
|
|
StopNode(nodeID int64)
|
|
|
|
OnlineNodeIDs() []int64
|
|
|
|
IsOnline(nodeID int64) (bool, error)
|
|
|
|
OfflineNodeIDs() []int64
|
|
|
|
HasNode(nodeID int64) bool
|
|
|
|
GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) []queryNodeGetMetricsResponse
|
|
|
|
|
|
|
|
AllocateSegmentsToQueryNode(ctx context.Context, reqs []*querypb.LoadSegmentsRequest, wait bool, excludeNodeIDs []int64, includeNodeIDs []int64, replicaID int64) error
|
|
|
|
AllocateChannelsToQueryNode(ctx context.Context, reqs []*querypb.WatchDmChannelsRequest, wait bool, excludeNodeIDs []int64, includeNodeIDs []int64, replicaID int64) error
|
|
|
|
AssignNodesToReplicas(ctx context.Context, replicas []*milvuspb.ReplicaInfo, collectionSize uint64) error
|
|
|
|
|
|
|
|
GetSessionVersion() int64
|
|
|
|
|
|
|
|
// Inner
|
2021-08-02 22:39:25 +08:00
|
|
|
reloadFromKV() error
|
2021-12-21 11:57:39 +08:00
|
|
|
getComponentInfos(ctx context.Context) []*internalpb.ComponentInfo
|
2021-08-02 22:39:25 +08:00
|
|
|
}
|
|
|
|
|
2021-08-26 14:17:54 +08:00
|
|
|
type newQueryNodeFn func(ctx context.Context, address string, id UniqueID, kv *etcdkv.EtcdKV) (Node, error)
|
|
|
|
|
2021-09-15 20:40:07 +08:00
|
|
|
type nodeState int
|
|
|
|
|
|
|
|
const (
|
|
|
|
disConnect nodeState = 0
|
|
|
|
online nodeState = 1
|
|
|
|
offline nodeState = 2
|
|
|
|
)
|
|
|
|
|
2021-06-15 12:41:40 +08:00
|
|
|
type queryNodeCluster struct {
|
2021-09-15 20:40:07 +08:00
|
|
|
ctx context.Context
|
|
|
|
cancel context.CancelFunc
|
2021-06-19 11:45:09 +08:00
|
|
|
client *etcdkv.EtcdKV
|
|
|
|
|
2021-09-15 20:40:07 +08:00
|
|
|
session *sessionutil.Session
|
|
|
|
sessionVersion int64
|
|
|
|
|
2021-06-15 12:41:40 +08:00
|
|
|
sync.RWMutex
|
2021-11-11 12:56:42 +08:00
|
|
|
clusterMeta Meta
|
2022-01-17 17:37:37 +08:00
|
|
|
handler *channelUnsubscribeHandler
|
2021-11-11 12:56:42 +08:00
|
|
|
nodes map[int64]Node
|
|
|
|
newNodeFn newQueryNodeFn
|
|
|
|
segmentAllocator SegmentAllocatePolicy
|
|
|
|
channelAllocator ChannelAllocatePolicy
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-01-17 17:37:37 +08:00
|
|
|
func newQueryNodeCluster(ctx context.Context, clusterMeta Meta, kv *etcdkv.EtcdKV, newNodeFn newQueryNodeFn, session *sessionutil.Session, handler *channelUnsubscribeHandler) (Cluster, error) {
|
2021-09-15 20:40:07 +08:00
|
|
|
childCtx, cancel := context.WithCancel(ctx)
|
2021-08-02 22:39:25 +08:00
|
|
|
nodes := make(map[int64]Node)
|
2021-06-19 11:45:09 +08:00
|
|
|
c := &queryNodeCluster{
|
2021-11-11 12:56:42 +08:00
|
|
|
ctx: childCtx,
|
|
|
|
cancel: cancel,
|
|
|
|
client: kv,
|
|
|
|
session: session,
|
|
|
|
clusterMeta: clusterMeta,
|
2022-01-17 17:37:37 +08:00
|
|
|
handler: handler,
|
2021-11-11 12:56:42 +08:00
|
|
|
nodes: nodes,
|
|
|
|
newNodeFn: newNodeFn,
|
|
|
|
segmentAllocator: defaultSegAllocatePolicy(),
|
|
|
|
channelAllocator: defaultChannelAllocatePolicy(),
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-06-19 11:45:09 +08:00
|
|
|
err := c.reloadFromKV()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return c, nil
|
|
|
|
}
|
|
|
|
|
2021-09-23 21:03:55 +08:00
|
|
|
// Reload trigger task, trigger task states, internal task, internal task state from etcd
|
|
|
|
// Assign the internal task to the corresponding trigger task as a child task
|
2021-06-19 11:45:09 +08:00
|
|
|
func (c *queryNodeCluster) reloadFromKV() error {
|
2021-09-15 20:40:07 +08:00
|
|
|
// get current online session
|
|
|
|
onlineNodeSessions, version, _ := c.session.GetSessions(typeutil.QueryNodeRole)
|
|
|
|
onlineSessionMap := make(map[int64]*sessionutil.Session)
|
|
|
|
for _, session := range onlineNodeSessions {
|
|
|
|
nodeID := session.ServerID
|
|
|
|
onlineSessionMap[nodeID] = session
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
2021-09-15 20:40:07 +08:00
|
|
|
for nodeID, session := range onlineSessionMap {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Info("reloadFromKV: register a queryNode to cluster", zap.Any("nodeID", nodeID))
|
2022-06-02 13:16:05 +08:00
|
|
|
err := c.RegisterNode(c.ctx, session, nodeID, disConnect)
|
2021-06-19 11:45:09 +08:00
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Warn("QueryNode failed to register", zap.Int64("nodeID", nodeID), zap.String("error info", err.Error()))
|
2021-06-19 11:45:09 +08:00
|
|
|
return err
|
|
|
|
}
|
2021-09-15 20:40:07 +08:00
|
|
|
}
|
|
|
|
c.sessionVersion = version
|
2021-06-26 22:28:10 +08:00
|
|
|
|
2021-09-15 20:40:07 +08:00
|
|
|
// load node information before power off from etcd
|
|
|
|
oldStringNodeIDs, oldNodeSessions, err := c.client.LoadWithPrefix(queryNodeInfoPrefix)
|
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Warn("reloadFromKV: get previous node info from etcd error", zap.Error(err))
|
2021-09-15 20:40:07 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
for index := range oldStringNodeIDs {
|
|
|
|
nodeID, err := strconv.ParseInt(filepath.Base(oldStringNodeIDs[index]), 10, 64)
|
2021-06-19 11:45:09 +08:00
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Warn("watchNodeLoop: parse nodeID error", zap.Error(err))
|
2021-06-19 11:45:09 +08:00
|
|
|
return err
|
|
|
|
}
|
2021-09-15 20:40:07 +08:00
|
|
|
if _, ok := onlineSessionMap[nodeID]; !ok {
|
|
|
|
session := &sessionutil.Session{}
|
|
|
|
err = json.Unmarshal([]byte(oldNodeSessions[index]), session)
|
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Warn("watchNodeLoop: unmarshal session error", zap.Error(err))
|
2021-09-15 20:40:07 +08:00
|
|
|
return err
|
|
|
|
}
|
2022-06-02 13:16:05 +08:00
|
|
|
err = c.RegisterNode(context.Background(), session, nodeID, offline)
|
2021-09-15 20:40:07 +08:00
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Warn("reloadFromKV: failed to add queryNode to cluster", zap.Int64("nodeID", nodeID), zap.String("error info", err.Error()))
|
2021-09-15 20:40:07 +08:00
|
|
|
return err
|
|
|
|
}
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
|
|
|
}
|
2021-09-15 20:40:07 +08:00
|
|
|
|
2021-06-19 11:45:09 +08:00
|
|
|
return nil
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) GetSessionVersion() int64 {
|
2021-09-15 20:40:07 +08:00
|
|
|
return c.sessionVersion
|
|
|
|
}
|
|
|
|
|
2021-12-21 11:57:39 +08:00
|
|
|
func (c *queryNodeCluster) getComponentInfos(ctx context.Context) []*internalpb.ComponentInfo {
|
2021-06-15 12:41:40 +08:00
|
|
|
c.RLock()
|
|
|
|
defer c.RUnlock()
|
2021-12-21 11:57:39 +08:00
|
|
|
var subComponentInfos []*internalpb.ComponentInfo
|
|
|
|
for _, node := range c.nodes {
|
2021-08-02 22:39:25 +08:00
|
|
|
componentState := node.getComponentInfo(ctx)
|
|
|
|
subComponentInfos = append(subComponentInfos, componentState)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2021-12-21 11:57:39 +08:00
|
|
|
return subComponentInfos
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) LoadSegments(ctx context.Context, nodeID int64, in *querypb.LoadSegmentsRequest) error {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
var targetNode Node
|
2021-06-15 12:41:40 +08:00
|
|
|
if node, ok := c.nodes[nodeID]; ok {
|
2021-11-17 23:37:11 +08:00
|
|
|
targetNode = node
|
|
|
|
}
|
|
|
|
c.RUnlock()
|
|
|
|
|
|
|
|
if targetNode != nil {
|
|
|
|
err := targetNode.loadSegments(ctx, in)
|
2021-08-02 22:39:25 +08:00
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Warn("loadSegments: queryNode load segments error", zap.Int64("nodeID", nodeID), zap.String("error info", err.Error()))
|
2021-08-02 22:39:25 +08:00
|
|
|
return err
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-10-22 19:07:15 +08:00
|
|
|
|
2021-08-02 22:39:25 +08:00
|
|
|
return nil
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-12-17 22:32:42 +08:00
|
|
|
return fmt.Errorf("loadSegments: can't find QueryNode by nodeID, nodeID = %d", nodeID)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) ReleaseSegments(ctx context.Context, leaderID int64, in *querypb.ReleaseSegmentsRequest) error {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
var targetNode Node
|
2022-05-25 15:17:59 +08:00
|
|
|
if node, ok := c.nodes[leaderID]; ok {
|
2021-11-17 23:37:11 +08:00
|
|
|
targetNode = node
|
|
|
|
}
|
|
|
|
c.RUnlock()
|
|
|
|
|
|
|
|
if targetNode != nil {
|
|
|
|
if !targetNode.isOnline() {
|
2021-08-02 22:39:25 +08:00
|
|
|
return errors.New("node offline")
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
2021-08-02 22:39:25 +08:00
|
|
|
|
2021-11-17 23:37:11 +08:00
|
|
|
err := targetNode.releaseSegments(ctx, in)
|
2021-08-02 22:39:25 +08:00
|
|
|
if err != nil {
|
2022-05-25 15:17:59 +08:00
|
|
|
log.Warn("releaseSegments: queryNode release segments error", zap.Int64("leaderID", leaderID), zap.Int64("nodeID", in.NodeID), zap.String("error info", err.Error()))
|
2021-08-02 22:39:25 +08:00
|
|
|
return err
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
2021-08-02 22:39:25 +08:00
|
|
|
|
|
|
|
return nil
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-05-25 15:17:59 +08:00
|
|
|
return fmt.Errorf("releaseSegments: can't find QueryNode by nodeID, nodeID = %d", leaderID)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) WatchDmChannels(ctx context.Context, nodeID int64, in *querypb.WatchDmChannelsRequest) error {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
var targetNode Node
|
2021-06-15 12:41:40 +08:00
|
|
|
if node, ok := c.nodes[nodeID]; ok {
|
2021-11-17 23:37:11 +08:00
|
|
|
targetNode = node
|
|
|
|
}
|
|
|
|
c.RUnlock()
|
|
|
|
|
|
|
|
if targetNode != nil {
|
|
|
|
err := targetNode.watchDmChannels(ctx, in)
|
2021-08-02 22:39:25 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
2021-12-21 11:57:39 +08:00
|
|
|
dmChannelWatchInfo := make([]*querypb.DmChannelWatchInfo, len(in.Infos))
|
|
|
|
for index, info := range in.Infos {
|
|
|
|
dmChannelWatchInfo[index] = &querypb.DmChannelWatchInfo{
|
|
|
|
CollectionID: info.CollectionID,
|
|
|
|
DmChannel: info.ChannelName,
|
|
|
|
NodeIDLoaded: nodeID,
|
2022-04-20 16:15:41 +08:00
|
|
|
ReplicaID: in.ReplicaID,
|
2022-04-29 14:11:47 +08:00
|
|
|
NodeIds: []int64{nodeID},
|
2021-12-21 11:57:39 +08:00
|
|
|
}
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-06-19 11:45:09 +08:00
|
|
|
|
2021-12-21 11:57:39 +08:00
|
|
|
err = c.clusterMeta.setDmChannelInfos(dmChannelWatchInfo)
|
2021-08-02 22:39:25 +08:00
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
// TODO DML channel maybe leaked, need to release dml if no related segment
|
2021-08-02 22:39:25 +08:00
|
|
|
return err
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-08-02 22:39:25 +08:00
|
|
|
|
|
|
|
return nil
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-12-24 19:39:03 +08:00
|
|
|
return fmt.Errorf("watchDmChannels: can't find QueryNode by nodeID, nodeID = %d", nodeID)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) WatchDeltaChannels(ctx context.Context, nodeID int64, in *querypb.WatchDeltaChannelsRequest) error {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
var targetNode Node
|
2021-11-05 14:47:19 +08:00
|
|
|
if node, ok := c.nodes[nodeID]; ok {
|
2021-11-17 23:37:11 +08:00
|
|
|
targetNode = node
|
|
|
|
}
|
|
|
|
c.RUnlock()
|
|
|
|
|
|
|
|
if targetNode != nil {
|
|
|
|
err := targetNode.watchDeltaChannels(ctx, in)
|
2021-11-05 14:47:19 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-11-13 08:49:08 +08:00
|
|
|
|
2021-11-05 14:47:19 +08:00
|
|
|
return nil
|
|
|
|
}
|
2021-11-17 23:37:11 +08:00
|
|
|
|
2021-12-21 21:27:09 +08:00
|
|
|
return fmt.Errorf("watchDeltaChannels: can't find QueryNode by nodeID, nodeID = %d", nodeID)
|
2021-11-05 14:47:19 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) HasWatchedDeltaChannel(ctx context.Context, nodeID int64, collectionID UniqueID) bool {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
defer c.RUnlock()
|
2021-11-13 08:49:08 +08:00
|
|
|
|
|
|
|
return c.nodes[nodeID].hasWatchedDeltaChannel(collectionID)
|
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) ReleaseCollection(ctx context.Context, nodeID int64, in *querypb.ReleaseCollectionRequest) error {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
var targetNode Node
|
2021-06-15 12:41:40 +08:00
|
|
|
if node, ok := c.nodes[nodeID]; ok {
|
2021-11-17 23:37:11 +08:00
|
|
|
targetNode = node
|
|
|
|
}
|
|
|
|
c.RUnlock()
|
|
|
|
|
|
|
|
if targetNode != nil {
|
|
|
|
err := targetNode.releaseCollection(ctx, in)
|
2021-08-02 22:39:25 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2021-07-13 14:16:00 +08:00
|
|
|
}
|
2021-12-21 11:57:39 +08:00
|
|
|
|
2021-08-02 22:39:25 +08:00
|
|
|
return nil
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2021-12-23 21:56:22 +08:00
|
|
|
return fmt.Errorf("releaseCollection: can't find QueryNode by nodeID, nodeID = %d", nodeID)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) ReleasePartitions(ctx context.Context, nodeID int64, in *querypb.ReleasePartitionsRequest) error {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
var targetNode Node
|
2021-06-15 12:41:40 +08:00
|
|
|
if node, ok := c.nodes[nodeID]; ok {
|
2021-11-17 23:37:11 +08:00
|
|
|
targetNode = node
|
|
|
|
}
|
|
|
|
c.RUnlock()
|
|
|
|
|
|
|
|
if targetNode != nil {
|
|
|
|
err := targetNode.releasePartitions(ctx, in)
|
2021-08-02 22:39:25 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2021-07-13 14:16:00 +08:00
|
|
|
}
|
2021-10-11 09:54:37 +08:00
|
|
|
|
2021-08-02 22:39:25 +08:00
|
|
|
return nil
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2021-12-28 21:49:07 +08:00
|
|
|
return fmt.Errorf("releasePartitions: can't find QueryNode by nodeID, nodeID = %d", nodeID)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) GetSegmentInfoByID(ctx context.Context, segmentID UniqueID) (*querypb.SegmentInfo, error) {
|
2021-12-13 21:51:10 +08:00
|
|
|
segmentInfo, err := c.clusterMeta.getSegmentInfoByID(segmentID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-11-12 18:49:10 +08:00
|
|
|
c.RLock()
|
2021-12-13 21:51:10 +08:00
|
|
|
targetNode, ok := c.nodes[segmentInfo.NodeID]
|
|
|
|
c.RUnlock()
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("updateSegmentInfo: can't find query node by nodeID, nodeID = %d", segmentInfo.NodeID)
|
|
|
|
}
|
2021-11-12 18:49:10 +08:00
|
|
|
|
2021-12-13 21:51:10 +08:00
|
|
|
res, err := targetNode.getSegmentInfo(ctx, &querypb.GetSegmentInfoRequest{
|
|
|
|
Base: &commonpb.MsgBase{
|
|
|
|
MsgType: commonpb.MsgType_SegmentInfo,
|
|
|
|
},
|
|
|
|
CollectionID: segmentInfo.CollectionID,
|
|
|
|
})
|
2021-11-12 18:49:10 +08:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2021-12-13 21:51:10 +08:00
|
|
|
|
|
|
|
// protobuf convention, it's ok to call GetXXX on nil
|
|
|
|
for _, info := range res.GetInfos() {
|
|
|
|
if info.GetSegmentID() == segmentID {
|
|
|
|
return info, nil
|
2021-11-12 18:49:10 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-23 21:58:14 +08:00
|
|
|
return nil, fmt.Errorf("updateSegmentInfo: can't find segment %d on QueryNode %d", segmentID, segmentInfo.NodeID)
|
2021-11-12 18:49:10 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) GetSegmentInfo(ctx context.Context, in *querypb.GetSegmentInfoRequest) ([]*querypb.SegmentInfo, error) {
|
2021-12-13 21:51:10 +08:00
|
|
|
type respTuple struct {
|
|
|
|
res *querypb.GetSegmentInfoResponse
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
2022-05-25 15:17:59 +08:00
|
|
|
var (
|
|
|
|
segmentInfos []*querypb.SegmentInfo
|
|
|
|
)
|
|
|
|
|
|
|
|
// Fetch sealed segments from Meta
|
|
|
|
if len(in.SegmentIDs) > 0 {
|
|
|
|
for _, segmentID := range in.SegmentIDs {
|
|
|
|
segment, err := c.clusterMeta.getSegmentInfoByID(segmentID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
segmentInfos = append(segmentInfos, segment)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
allSegments := c.clusterMeta.showSegmentInfos(in.CollectionID, nil)
|
|
|
|
for _, segment := range allSegments {
|
|
|
|
if in.CollectionID == 0 || segment.CollectionID == in.CollectionID {
|
|
|
|
segmentInfos = append(segmentInfos, segment)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fetch growing segments
|
2021-07-13 14:16:00 +08:00
|
|
|
c.RLock()
|
2021-12-13 21:51:10 +08:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
cnt := len(c.nodes)
|
|
|
|
resChan := make(chan respTuple, cnt)
|
|
|
|
wg.Add(cnt)
|
2021-08-02 22:39:25 +08:00
|
|
|
for _, node := range c.nodes {
|
2021-12-13 21:51:10 +08:00
|
|
|
go func(node Node) {
|
|
|
|
defer wg.Done()
|
|
|
|
res, err := node.getSegmentInfo(ctx, in)
|
|
|
|
resChan <- respTuple{
|
|
|
|
res: res,
|
|
|
|
err: err,
|
|
|
|
}
|
|
|
|
}(node)
|
|
|
|
}
|
|
|
|
c.RUnlock()
|
|
|
|
wg.Wait()
|
|
|
|
close(resChan)
|
|
|
|
|
|
|
|
for tuple := range resChan {
|
|
|
|
if tuple.err != nil {
|
|
|
|
return nil, tuple.err
|
2021-08-02 22:39:25 +08:00
|
|
|
}
|
2022-05-25 15:17:59 +08:00
|
|
|
|
|
|
|
segments := tuple.res.GetInfos()
|
|
|
|
for _, segment := range segments {
|
|
|
|
if segment.SegmentState != commonpb.SegmentState_Sealed {
|
|
|
|
segmentInfos = append(segmentInfos, segment)
|
|
|
|
}
|
|
|
|
}
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
2021-08-02 22:39:25 +08:00
|
|
|
//TODO::update meta
|
2021-06-15 12:41:40 +08:00
|
|
|
return segmentInfos, nil
|
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) GetSegmentInfoByNode(ctx context.Context, nodeID int64, in *querypb.GetSegmentInfoRequest) ([]*querypb.SegmentInfo, error) {
|
2021-11-05 16:00:55 +08:00
|
|
|
c.RLock()
|
2021-12-13 21:51:10 +08:00
|
|
|
node, ok := c.nodes[nodeID]
|
|
|
|
c.RUnlock()
|
2021-11-05 16:00:55 +08:00
|
|
|
|
2021-12-13 21:51:10 +08:00
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("getSegmentInfoByNode: can't find query node by nodeID, nodeID = %d", nodeID)
|
2021-11-05 16:00:55 +08:00
|
|
|
}
|
2021-12-13 21:51:10 +08:00
|
|
|
res, err := node.getSegmentInfo(ctx, in)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return res.GetInfos(), nil
|
2021-11-05 16:00:55 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) SyncReplicaSegments(ctx context.Context, leaderID UniqueID, in *querypb.SyncReplicaSegmentsRequest) error {
|
2022-04-28 16:01:48 +08:00
|
|
|
c.RLock()
|
|
|
|
leader, ok := c.nodes[leaderID]
|
|
|
|
c.RUnlock()
|
|
|
|
|
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("syncReplicaSegments: can't find leader query node, leaderID = %d", leaderID)
|
|
|
|
}
|
|
|
|
return leader.syncReplicaSegments(ctx, in)
|
|
|
|
}
|
|
|
|
|
2021-08-17 10:06:11 +08:00
|
|
|
type queryNodeGetMetricsResponse struct {
|
|
|
|
resp *milvuspb.GetMetricsResponse
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) []queryNodeGetMetricsResponse {
|
2021-08-17 10:06:11 +08:00
|
|
|
c.RLock()
|
2021-12-13 21:51:10 +08:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
cnt := len(c.nodes)
|
|
|
|
wg.Add(cnt)
|
|
|
|
respChan := make(chan queryNodeGetMetricsResponse, cnt)
|
2021-08-17 10:06:11 +08:00
|
|
|
for _, node := range c.nodes {
|
2021-12-13 21:51:10 +08:00
|
|
|
go func(node Node) {
|
|
|
|
defer wg.Done()
|
|
|
|
resp, err := node.getMetrics(ctx, in)
|
|
|
|
respChan <- queryNodeGetMetricsResponse{
|
|
|
|
resp: resp,
|
|
|
|
err: err,
|
|
|
|
}
|
|
|
|
}(node)
|
|
|
|
}
|
|
|
|
c.RUnlock()
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
close(respChan)
|
|
|
|
|
|
|
|
ret := make([]queryNodeGetMetricsResponse, 0, cnt)
|
|
|
|
for res := range respChan {
|
|
|
|
ret = append(ret, res)
|
2021-08-17 10:06:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
2022-01-17 17:37:37 +08:00
|
|
|
// setNodeState update queryNode state, which may be offline, disconnect, online
|
|
|
|
// when queryCoord restart, it will call setNodeState via the registerNode function
|
|
|
|
// when the new queryNode starts, queryCoord calls setNodeState via the registerNode function
|
|
|
|
// when the new queryNode down, queryCoord calls setNodeState via the stopNode function
|
|
|
|
func (c *queryNodeCluster) setNodeState(nodeID int64, node Node, state nodeState) {
|
|
|
|
// if query node down, should unsubscribe all channel the node has watched
|
|
|
|
// if not unsubscribe channel, may result in pulsar having too many backlogs
|
|
|
|
if state == offline {
|
|
|
|
// 1. find all the search/dmChannel/deltaChannel the node has watched
|
|
|
|
unsubscribeChannelInfo := c.clusterMeta.getWatchedChannelsByNodeID(nodeID)
|
|
|
|
|
|
|
|
// 2.add unsubscribed channels to handler, handler will auto unsubscribe channel
|
|
|
|
if len(unsubscribeChannelInfo.CollectionChannels) != 0 {
|
|
|
|
c.handler.addUnsubscribeChannelInfo(unsubscribeChannelInfo)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
node.setState(state)
|
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) RegisterNode(ctx context.Context, session *sessionutil.Session, id UniqueID, state nodeState) error {
|
2021-06-19 11:45:09 +08:00
|
|
|
c.Lock()
|
|
|
|
defer c.Unlock()
|
|
|
|
|
2021-06-15 12:41:40 +08:00
|
|
|
if _, ok := c.nodes[id]; !ok {
|
2021-07-13 14:16:00 +08:00
|
|
|
sessionJSON, err := json.Marshal(session)
|
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Warn("registerNode: marshal session error", zap.Int64("nodeID", id), zap.Any("address", session))
|
2021-07-13 14:16:00 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
key := fmt.Sprintf("%s/%d", queryNodeInfoPrefix, id)
|
|
|
|
err = c.client.Save(key, string(sessionJSON))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-09-15 20:40:07 +08:00
|
|
|
node, err := c.newNodeFn(ctx, session.Address, id, c.client)
|
2021-08-26 14:17:54 +08:00
|
|
|
if err != nil {
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Warn("registerNode: create a new QueryNode failed", zap.Int64("nodeID", id), zap.Error(err))
|
2021-08-26 14:17:54 +08:00
|
|
|
return err
|
|
|
|
}
|
2022-01-17 17:37:37 +08:00
|
|
|
c.setNodeState(id, node, state)
|
2021-09-15 20:40:07 +08:00
|
|
|
if state < online {
|
|
|
|
go node.start()
|
|
|
|
}
|
|
|
|
c.nodes[id] = node
|
2022-02-28 16:51:55 +08:00
|
|
|
metrics.QueryCoordNumQueryNodes.WithLabelValues().Inc()
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Info("registerNode: create a new QueryNode", zap.Int64("nodeID", id), zap.String("address", session.Address), zap.Any("state", state))
|
2021-06-15 12:41:40 +08:00
|
|
|
return nil
|
|
|
|
}
|
2021-12-28 21:53:07 +08:00
|
|
|
return fmt.Errorf("registerNode: QueryNode %d alredy exists in cluster", id)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-06-19 11:45:09 +08:00
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) GetNodeInfoByID(nodeID int64) (Node, error) {
|
2021-06-30 17:48:19 +08:00
|
|
|
c.RLock()
|
2021-12-13 21:51:10 +08:00
|
|
|
node, ok := c.nodes[nodeID]
|
|
|
|
c.RUnlock()
|
|
|
|
if !ok {
|
2021-12-23 22:02:07 +08:00
|
|
|
return nil, fmt.Errorf("getNodeInfoByID: QueryNode %d not exist", nodeID)
|
2021-06-30 17:48:19 +08:00
|
|
|
}
|
|
|
|
|
2021-12-13 21:51:10 +08:00
|
|
|
nodeInfo, err := node.getNodeInfo()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return nodeInfo, nil
|
2021-06-30 17:48:19 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) RemoveNodeInfo(nodeID int64) error {
|
2021-06-22 14:10:09 +08:00
|
|
|
c.Lock()
|
|
|
|
defer c.Unlock()
|
|
|
|
|
2021-06-19 11:45:09 +08:00
|
|
|
key := fmt.Sprintf("%s/%d", queryNodeInfoPrefix, nodeID)
|
2021-06-22 14:10:09 +08:00
|
|
|
err := c.client.Remove(key)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-12-21 11:57:39 +08:00
|
|
|
delete(c.nodes, nodeID)
|
2022-02-28 16:51:55 +08:00
|
|
|
metrics.QueryCoordNumQueryNodes.WithLabelValues().Dec()
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Info("removeNodeInfo: delete nodeInfo in cluster MetaReplica", zap.Int64("nodeID", nodeID))
|
2021-06-22 14:10:09 +08:00
|
|
|
|
|
|
|
return nil
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) StopNode(nodeID int64) {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
defer c.RUnlock()
|
2021-08-26 14:17:54 +08:00
|
|
|
|
2021-07-13 14:16:00 +08:00
|
|
|
if node, ok := c.nodes[nodeID]; ok {
|
|
|
|
node.stop()
|
2022-01-17 17:37:37 +08:00
|
|
|
c.setNodeState(nodeID, node, offline)
|
2022-04-26 11:29:54 +08:00
|
|
|
log.Info("stopNode: queryNode offline", zap.Int64("nodeID", nodeID))
|
2021-07-13 14:16:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) OnlineNodeIDs() []int64 {
|
2021-06-30 17:48:19 +08:00
|
|
|
c.RLock()
|
|
|
|
defer c.RUnlock()
|
2021-06-19 11:45:09 +08:00
|
|
|
|
2021-12-21 11:57:39 +08:00
|
|
|
var onlineNodeIDs []int64
|
2021-06-19 11:45:09 +08:00
|
|
|
for nodeID, node := range c.nodes {
|
2021-09-15 20:40:07 +08:00
|
|
|
if node.isOnline() {
|
2021-12-21 11:57:39 +08:00
|
|
|
onlineNodeIDs = append(onlineNodeIDs, nodeID)
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-21 11:57:39 +08:00
|
|
|
return onlineNodeIDs
|
2021-06-30 17:48:19 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) OfflineNodeIDs() []int64 {
|
2021-09-15 20:40:07 +08:00
|
|
|
c.RLock()
|
|
|
|
defer c.RUnlock()
|
|
|
|
|
2021-12-21 11:57:39 +08:00
|
|
|
var offlineNodeIDs []int64
|
|
|
|
for nodeID, node := range c.nodes {
|
|
|
|
if node.isOffline() {
|
|
|
|
offlineNodeIDs = append(offlineNodeIDs, nodeID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return offlineNodeIDs
|
2021-09-15 20:40:07 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) HasNode(nodeID int64) bool {
|
2021-11-05 16:00:55 +08:00
|
|
|
c.RLock()
|
|
|
|
defer c.RUnlock()
|
|
|
|
|
|
|
|
if _, ok := c.nodes[nodeID]; ok {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) IsOnline(nodeID int64) (bool, error) {
|
2021-11-17 23:37:11 +08:00
|
|
|
c.RLock()
|
|
|
|
defer c.RUnlock()
|
2021-06-30 17:48:19 +08:00
|
|
|
|
|
|
|
if node, ok := c.nodes[nodeID]; ok {
|
2021-09-15 20:40:07 +08:00
|
|
|
return node.isOnline(), nil
|
2021-06-30 17:48:19 +08:00
|
|
|
}
|
|
|
|
|
2021-12-24 19:37:12 +08:00
|
|
|
return false, fmt.Errorf("isOnline: QueryNode %d not exist", nodeID)
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
|
|
|
|
2021-09-15 20:40:07 +08:00
|
|
|
//func (c *queryNodeCluster) printMeta() {
|
|
|
|
// c.RLock()
|
|
|
|
// defer c.RUnlock()
|
|
|
|
//
|
|
|
|
// for id, node := range c.nodes {
|
|
|
|
// if node.isOnline() {
|
|
|
|
// collectionInfos := node.showCollections()
|
|
|
|
// for _, info := range collectionInfos {
|
|
|
|
// log.Debug("PrintMeta: query coordinator cluster info: collectionInfo", zap.Int64("nodeID", id), zap.Int64("collectionID", info.CollectionID), zap.Any("info", info))
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
//}
|
2021-08-02 22:39:25 +08:00
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) AllocateSegmentsToQueryNode(ctx context.Context, reqs []*querypb.LoadSegmentsRequest, wait bool, excludeNodeIDs []int64, includeNodeIDs []int64, replicaID int64) error {
|
2022-04-20 16:15:41 +08:00
|
|
|
return c.segmentAllocator(ctx, reqs, c, c.clusterMeta, wait, excludeNodeIDs, includeNodeIDs, replicaID)
|
2021-11-11 12:56:42 +08:00
|
|
|
}
|
|
|
|
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) AllocateChannelsToQueryNode(ctx context.Context, reqs []*querypb.WatchDmChannelsRequest, wait bool, excludeNodeIDs []int64, includeNodeIDs []int64, replicaID int64) error {
|
2022-04-20 16:15:41 +08:00
|
|
|
return c.channelAllocator(ctx, reqs, c, c.clusterMeta, wait, excludeNodeIDs, includeNodeIDs, replicaID)
|
2021-11-11 12:56:42 +08:00
|
|
|
}
|
2022-03-25 14:03:25 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
// Return error if no enough nodes/resources to create replicas
|
2022-06-02 13:16:05 +08:00
|
|
|
func (c *queryNodeCluster) AssignNodesToReplicas(ctx context.Context, replicas []*milvuspb.ReplicaInfo, collectionSize uint64) error {
|
|
|
|
nodeIds := c.OnlineNodeIDs()
|
2022-04-20 16:15:41 +08:00
|
|
|
if len(nodeIds) < len(replicas) {
|
|
|
|
return fmt.Errorf("no enough nodes to create replicas, node_num=%d replica_num=%d", len(nodeIds), len(replicas))
|
2022-03-28 16:43:27 +08:00
|
|
|
}
|
|
|
|
|
2022-05-13 18:31:54 +08:00
|
|
|
nodeInfos := getNodeInfos(c, nodeIds)
|
2022-04-20 16:15:41 +08:00
|
|
|
if len(nodeInfos) < len(replicas) {
|
|
|
|
return fmt.Errorf("no enough nodes to create replicas, node_num=%d replica_num=%d", len(nodeInfos), len(replicas))
|
|
|
|
}
|
|
|
|
|
|
|
|
sort.Slice(nodeInfos, func(i, j int) bool {
|
|
|
|
return nodeInfos[i].totalMem-nodeInfos[i].memUsage > nodeInfos[j].totalMem-nodeInfos[j].memUsage
|
|
|
|
})
|
|
|
|
|
|
|
|
memCapCount := make([]uint64, len(replicas))
|
|
|
|
for _, info := range nodeInfos {
|
|
|
|
i := 0
|
|
|
|
minMemCap := uint64(math.MaxUint64)
|
|
|
|
for j, memCap := range memCapCount {
|
|
|
|
if memCap < minMemCap {
|
|
|
|
minMemCap = memCap
|
|
|
|
i = j
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
replicas[i].NodeIds = append(replicas[i].NodeIds, info.id)
|
|
|
|
memCapCount[i] += info.totalMem - info.memUsage
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, memCap := range memCapCount {
|
|
|
|
if memCap < collectionSize {
|
|
|
|
return fmt.Errorf("no enough memory to load collection/partitions, collectionSize=%v, replicasNum=%v", collectionSize, len(replicas))
|
|
|
|
}
|
2022-03-28 16:43:27 +08:00
|
|
|
}
|
2022-03-25 14:03:25 +08:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
|
|
|
|
// It's a helper method to concurrently get nodes' info
|
|
|
|
// Remove nodes that it can't connect to
|
2022-05-13 18:31:54 +08:00
|
|
|
func getNodeInfos(cluster Cluster, nodeIds []UniqueID) []*queryNode {
|
2022-04-20 16:15:41 +08:00
|
|
|
nodeCh := make(chan *queryNode, len(nodeIds))
|
|
|
|
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
for _, id := range nodeIds {
|
|
|
|
wg.Add(1)
|
|
|
|
go func(id UniqueID) {
|
|
|
|
defer wg.Done()
|
2022-06-02 13:16:05 +08:00
|
|
|
info, err := cluster.GetNodeInfoByID(id)
|
2022-04-20 16:15:41 +08:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
nodeCh <- info.(*queryNode)
|
|
|
|
}(id)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
close(nodeCh)
|
|
|
|
|
|
|
|
nodes := make([]*queryNode, 0, len(nodeCh))
|
|
|
|
for node := range nodeCh {
|
|
|
|
nodes = append(nodes, node)
|
|
|
|
}
|
|
|
|
|
2022-05-13 18:31:54 +08:00
|
|
|
return nodes
|
2022-04-20 16:15:41 +08:00
|
|
|
}
|