2021-12-09 14:37:22 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
2021-04-19 13:47:10 +08:00
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
2021-12-09 14:37:22 +08:00
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
2021-04-19 13:47:10 +08:00
|
|
|
//
|
2021-12-09 14:37:22 +08:00
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2021-04-19 13:47:10 +08:00
|
|
|
|
2021-01-16 10:12:14 +08:00
|
|
|
package querynode
|
2020-08-25 15:45:19 +08:00
|
|
|
|
2020-09-02 10:38:08 +08:00
|
|
|
/*
|
|
|
|
|
2020-10-23 18:01:24 +08:00
|
|
|
#cgo CFLAGS: -I${SRCDIR}/../core/output/include
|
2020-09-02 10:38:08 +08:00
|
|
|
|
2020-10-31 15:11:47 +08:00
|
|
|
#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib
|
2020-09-02 10:38:08 +08:00
|
|
|
|
2020-11-25 10:31:51 +08:00
|
|
|
#include "segcore/collection_c.h"
|
|
|
|
#include "segcore/segment_c.h"
|
2021-04-16 14:02:49 +08:00
|
|
|
#include "segcore/segcore_init_c.h"
|
2020-09-02 10:38:08 +08:00
|
|
|
|
|
|
|
*/
|
2020-08-25 15:45:19 +08:00
|
|
|
import "C"
|
2020-09-02 10:38:08 +08:00
|
|
|
|
2020-08-25 15:45:19 +08:00
|
|
|
import (
|
2020-10-15 21:31:50 +08:00
|
|
|
"context"
|
2021-03-22 16:36:10 +08:00
|
|
|
"errors"
|
2021-10-20 17:54:43 +08:00
|
|
|
"fmt"
|
|
|
|
"path/filepath"
|
2021-06-21 18:22:13 +08:00
|
|
|
"strconv"
|
2021-09-24 21:03:56 +08:00
|
|
|
"sync"
|
2021-06-21 18:22:13 +08:00
|
|
|
"sync/atomic"
|
2021-11-22 16:23:17 +08:00
|
|
|
"syscall"
|
2021-10-12 19:56:34 +08:00
|
|
|
"time"
|
2021-09-15 12:57:48 +08:00
|
|
|
"unsafe"
|
2021-06-21 18:22:13 +08:00
|
|
|
|
2021-10-20 17:54:43 +08:00
|
|
|
"github.com/golang/protobuf/proto"
|
|
|
|
"go.etcd.io/etcd/api/v3/mvccpb"
|
2021-06-19 11:45:09 +08:00
|
|
|
"go.uber.org/zap"
|
2021-01-11 18:35:54 +08:00
|
|
|
|
2021-07-13 14:16:00 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/kv"
|
|
|
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
|
|
|
"github.com/milvus-io/milvus/internal/msgstream"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
2021-10-20 17:54:43 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/types"
|
2021-11-17 12:13:10 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util"
|
2021-07-13 14:16:00 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/retry"
|
2021-05-21 19:28:52 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
2020-08-25 15:45:19 +08:00
|
|
|
)
|
|
|
|
|
2021-10-05 12:42:04 +08:00
|
|
|
// make sure QueryNode implements types.QueryNode
|
|
|
|
var _ types.QueryNode = (*QueryNode)(nil)
|
|
|
|
|
2021-10-11 17:16:30 +08:00
|
|
|
// make sure QueryNode implements types.QueryNodeComponent
|
|
|
|
var _ types.QueryNodeComponent = (*QueryNode)(nil)
|
2021-10-05 12:42:04 +08:00
|
|
|
|
2021-09-26 20:16:12 +08:00
|
|
|
// QueryNode communicates with outside services and union all
|
|
|
|
// services in querynode package.
|
|
|
|
//
|
|
|
|
// QueryNode implements `types.Component`, `types.QueryNode` interfaces.
|
|
|
|
// `rootCoord` is a grpc client of root coordinator.
|
|
|
|
// `indexCoord` is a grpc client of index coordinator.
|
|
|
|
// `stateCode` is current statement of this query node, indicating whether it's healthy.
|
2020-08-25 15:45:19 +08:00
|
|
|
type QueryNode struct {
|
2020-12-08 14:41:04 +08:00
|
|
|
queryNodeLoopCtx context.Context
|
2020-12-10 16:31:09 +08:00
|
|
|
queryNodeLoopCancel context.CancelFunc
|
2020-10-15 21:31:50 +08:00
|
|
|
|
2021-07-13 14:16:00 +08:00
|
|
|
stateCode atomic.Value
|
2020-08-25 15:45:19 +08:00
|
|
|
|
2021-09-24 21:03:56 +08:00
|
|
|
//call once
|
|
|
|
initOnce sync.Once
|
2021-09-23 18:29:55 +08:00
|
|
|
|
2021-05-28 10:26:30 +08:00
|
|
|
// internal components
|
|
|
|
historical *historical
|
|
|
|
streaming *streaming
|
2020-08-25 15:45:19 +08:00
|
|
|
|
2021-11-06 11:02:58 +08:00
|
|
|
// tSafeReplica
|
|
|
|
tSafeReplica TSafeReplicaInterface
|
|
|
|
|
|
|
|
// dataSyncService
|
|
|
|
dataSyncService *dataSyncService
|
|
|
|
|
2021-01-15 15:28:54 +08:00
|
|
|
// internal services
|
2021-06-23 20:26:10 +08:00
|
|
|
queryService *queryService
|
2021-11-12 18:27:10 +08:00
|
|
|
statsService *statsService
|
|
|
|
|
|
|
|
// segment loader
|
|
|
|
loader *segmentLoader
|
2021-01-18 10:09:17 +08:00
|
|
|
|
2021-01-26 13:41:41 +08:00
|
|
|
// clients
|
2021-06-22 16:44:09 +08:00
|
|
|
rootCoord types.RootCoord
|
|
|
|
indexCoord types.IndexCoord
|
2021-02-08 14:30:54 +08:00
|
|
|
|
|
|
|
msFactory msgstream.Factory
|
2021-04-12 09:18:43 +08:00
|
|
|
scheduler *taskScheduler
|
2021-05-21 19:28:52 +08:00
|
|
|
|
|
|
|
session *sessionutil.Session
|
2021-06-19 11:45:09 +08:00
|
|
|
|
|
|
|
minioKV kv.BaseKV // minio minioKV
|
|
|
|
etcdKV *etcdkv.EtcdKV
|
2020-11-05 10:52:50 +08:00
|
|
|
}
|
2020-09-07 17:01:46 +08:00
|
|
|
|
2021-09-27 19:02:09 +08:00
|
|
|
// NewQueryNode will return a QueryNode with abnormal state.
|
2021-07-13 14:16:00 +08:00
|
|
|
func NewQueryNode(ctx context.Context, factory msgstream.Factory) *QueryNode {
|
2021-01-27 09:50:52 +08:00
|
|
|
ctx1, cancel := context.WithCancel(ctx)
|
|
|
|
node := &QueryNode{
|
|
|
|
queryNodeLoopCtx: ctx1,
|
|
|
|
queryNodeLoopCancel: cancel,
|
2021-06-23 20:26:10 +08:00
|
|
|
queryService: nil,
|
2021-05-28 10:26:30 +08:00
|
|
|
msFactory: factory,
|
2021-01-27 09:50:52 +08:00
|
|
|
}
|
|
|
|
|
2021-04-12 09:18:43 +08:00
|
|
|
node.scheduler = newTaskScheduler(ctx1)
|
2021-03-12 14:22:09 +08:00
|
|
|
node.UpdateStateCode(internalpb.StateCode_Abnormal)
|
2021-01-27 09:50:52 +08:00
|
|
|
|
2021-02-23 11:40:30 +08:00
|
|
|
return node
|
2020-09-15 15:53:10 +08:00
|
|
|
}
|
|
|
|
|
2021-12-15 11:47:10 +08:00
|
|
|
func (node *QueryNode) initSession() error {
|
2021-06-11 22:04:41 +08:00
|
|
|
node.session = sessionutil.NewSession(node.queryNodeLoopCtx, Params.MetaRootPath, Params.EtcdEndpoints)
|
2021-12-15 11:47:10 +08:00
|
|
|
if node.session == nil {
|
|
|
|
return fmt.Errorf("session is nil, the etcd client connection may have failed")
|
|
|
|
}
|
2021-10-14 16:40:35 +08:00
|
|
|
node.session.Init(typeutil.QueryNodeRole, Params.QueryNodeIP+":"+strconv.FormatInt(Params.QueryNodePort, 10), false)
|
2021-12-15 11:47:10 +08:00
|
|
|
Params.QueryNodeID = node.session.ServerID
|
|
|
|
Params.SetLogger(Params.QueryNodeID)
|
|
|
|
log.Debug("QueryNode", zap.Int64("nodeID", Params.QueryNodeID), zap.String("node address", node.session.Address))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Register register query node at etcd
|
|
|
|
func (node *QueryNode) Register() error {
|
|
|
|
node.session.Register()
|
2021-10-14 16:40:35 +08:00
|
|
|
// start liveness check
|
|
|
|
go node.session.LivenessCheck(node.queryNodeLoopCtx, func() {
|
2021-10-30 10:24:38 +08:00
|
|
|
log.Error("Query Node disconnected from etcd, process will exit", zap.Int64("Server Id", node.session.ServerID))
|
|
|
|
if err := node.Stop(); err != nil {
|
|
|
|
log.Fatal("failed to stop server", zap.Error(err))
|
|
|
|
}
|
2021-11-22 16:23:17 +08:00
|
|
|
// manually send signal to starter goroutine
|
|
|
|
syscall.Kill(syscall.Getpid(), syscall.SIGINT)
|
2021-10-14 16:40:35 +08:00
|
|
|
})
|
|
|
|
|
2021-09-27 17:37:57 +08:00
|
|
|
//TODO Reset the logger
|
|
|
|
//Params.initLogCfg()
|
2021-05-25 15:06:05 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-11-05 09:19:31 +08:00
|
|
|
// InitSegcore set init params of segCore, such as chunckRows, SIMD type...
|
2021-09-15 10:35:52 +08:00
|
|
|
func (node *QueryNode) InitSegcore() {
|
|
|
|
C.SegcoreInit()
|
|
|
|
|
|
|
|
// override segcore chunk size
|
2021-09-15 15:15:52 +08:00
|
|
|
cChunkRows := C.int64_t(Params.ChunkRows)
|
|
|
|
C.SegcoreSetChunkRows(cChunkRows)
|
2021-09-15 12:57:48 +08:00
|
|
|
|
|
|
|
// override segcore SIMD type
|
|
|
|
cSimdType := C.CString(Params.SimdType)
|
2021-09-29 20:50:19 +08:00
|
|
|
cRealSimdType := C.SegcoreSetSimdType(cSimdType)
|
|
|
|
Params.SimdType = C.GoString(cRealSimdType)
|
|
|
|
C.free(unsafe.Pointer(cRealSimdType))
|
2021-09-15 12:57:48 +08:00
|
|
|
C.free(unsafe.Pointer(cSimdType))
|
2021-09-15 10:35:52 +08:00
|
|
|
}
|
|
|
|
|
2021-11-03 23:15:16 +08:00
|
|
|
// Init function init historical and streaming module to manage segments
|
2021-01-21 10:01:29 +08:00
|
|
|
func (node *QueryNode) Init() error {
|
2021-09-24 21:03:56 +08:00
|
|
|
var initError error = nil
|
|
|
|
node.initOnce.Do(func() {
|
|
|
|
//ctx := context.Background()
|
2021-12-15 11:47:10 +08:00
|
|
|
log.Debug("QueryNode session info", zap.String("metaPath", Params.MetaRootPath), zap.Strings("etcdEndPoints", Params.EtcdEndpoints))
|
|
|
|
err := node.initSession()
|
|
|
|
if err != nil {
|
|
|
|
log.Error("QueryNode init session failed", zap.Error(err))
|
|
|
|
initError = err
|
|
|
|
return
|
|
|
|
}
|
2021-09-24 21:03:56 +08:00
|
|
|
connectEtcdFn := func() error {
|
|
|
|
etcdKV, err := etcdkv.NewEtcdKV(Params.EtcdEndpoints, Params.MetaRootPath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
node.etcdKV = etcdKV
|
2021-06-19 11:45:09 +08:00
|
|
|
return err
|
|
|
|
}
|
2021-09-24 21:03:56 +08:00
|
|
|
log.Debug("queryNode try to connect etcd",
|
|
|
|
zap.Any("EtcdEndpoints", Params.EtcdEndpoints),
|
|
|
|
zap.Any("MetaRootPath", Params.MetaRootPath),
|
|
|
|
)
|
2021-12-15 11:47:10 +08:00
|
|
|
err = retry.Do(node.queryNodeLoopCtx, connectEtcdFn, retry.Attempts(300))
|
2021-09-24 21:03:56 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Debug("queryNode try to connect etcd failed", zap.Error(err))
|
|
|
|
initError = err
|
|
|
|
return
|
|
|
|
}
|
|
|
|
log.Debug("queryNode try to connect etcd success",
|
|
|
|
zap.Any("EtcdEndpoints", Params.EtcdEndpoints),
|
|
|
|
zap.Any("MetaRootPath", Params.MetaRootPath),
|
|
|
|
)
|
2021-12-17 14:41:33 +08:00
|
|
|
node.tSafeReplica = newTSafeReplica()
|
2021-11-06 11:02:58 +08:00
|
|
|
|
|
|
|
streamingReplica := newCollectionReplica(node.etcdKV)
|
|
|
|
historicalReplica := newCollectionReplica(node.etcdKV)
|
2021-09-24 21:03:56 +08:00
|
|
|
|
|
|
|
node.historical = newHistorical(node.queryNodeLoopCtx,
|
2021-11-06 11:02:58 +08:00
|
|
|
historicalReplica,
|
|
|
|
node.etcdKV,
|
|
|
|
node.tSafeReplica,
|
|
|
|
)
|
|
|
|
node.streaming = newStreaming(node.queryNodeLoopCtx,
|
|
|
|
streamingReplica,
|
|
|
|
node.msFactory,
|
|
|
|
node.etcdKV,
|
|
|
|
node.tSafeReplica,
|
|
|
|
)
|
|
|
|
|
2021-11-12 18:27:10 +08:00
|
|
|
node.loader = newSegmentLoader(node.queryNodeLoopCtx,
|
|
|
|
node.rootCoord,
|
|
|
|
node.indexCoord,
|
|
|
|
node.historical.replica,
|
|
|
|
node.streaming.replica,
|
2021-11-21 07:33:14 +08:00
|
|
|
node.etcdKV,
|
|
|
|
node.msFactory)
|
2021-11-12 18:27:10 +08:00
|
|
|
|
|
|
|
node.statsService = newStatsService(node.queryNodeLoopCtx, node.historical.replica, node.loader.indexLoader.fieldStatsChan, node.msFactory)
|
2021-11-06 11:02:58 +08:00
|
|
|
node.dataSyncService = newDataSyncService(node.queryNodeLoopCtx, streamingReplica, historicalReplica, node.tSafeReplica, node.msFactory)
|
2021-09-24 21:03:56 +08:00
|
|
|
|
|
|
|
node.InitSegcore()
|
|
|
|
|
|
|
|
if node.rootCoord == nil {
|
2021-11-12 18:27:10 +08:00
|
|
|
initError = errors.New("null root coordinator detected when queryNode init")
|
|
|
|
return
|
2021-09-24 21:03:56 +08:00
|
|
|
}
|
2021-01-30 16:02:10 +08:00
|
|
|
|
2021-09-24 21:03:56 +08:00
|
|
|
if node.indexCoord == nil {
|
2021-11-12 18:27:10 +08:00
|
|
|
initError = errors.New("null index coordinator detected when queryNode init")
|
|
|
|
return
|
2021-09-24 21:03:56 +08:00
|
|
|
}
|
2021-11-12 18:27:10 +08:00
|
|
|
|
|
|
|
log.Debug("query node init successfully",
|
|
|
|
zap.Any("queryNodeID", Params.QueryNodeID),
|
|
|
|
zap.Any("IP", Params.QueryNodeIP),
|
|
|
|
zap.Any("Port", Params.QueryNodePort),
|
|
|
|
)
|
2021-12-15 11:47:10 +08:00
|
|
|
// This param needs valid QueryNodeID
|
|
|
|
Params.initMsgChannelSubName()
|
2021-09-24 21:03:56 +08:00
|
|
|
})
|
2021-01-26 13:41:41 +08:00
|
|
|
|
2021-09-24 21:03:56 +08:00
|
|
|
return initError
|
2021-01-30 16:02:10 +08:00
|
|
|
}
|
|
|
|
|
2021-11-03 23:48:17 +08:00
|
|
|
// Start mainly start QueryNode's query service.
|
2021-01-30 16:02:10 +08:00
|
|
|
func (node *QueryNode) Start() error {
|
2021-02-08 14:30:54 +08:00
|
|
|
var err error
|
|
|
|
m := map[string]interface{}{
|
|
|
|
"PulsarAddress": Params.PulsarAddress,
|
|
|
|
"ReceiveBufSize": 1024,
|
|
|
|
"PulsarBufSize": 1024}
|
|
|
|
err = node.msFactory.SetParams(m)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-01-19 11:37:16 +08:00
|
|
|
// init services and manager
|
2021-05-28 10:26:30 +08:00
|
|
|
// TODO: pass node.streaming.replica to search service
|
2021-06-23 20:26:10 +08:00
|
|
|
node.queryService = newQueryService(node.queryNodeLoopCtx,
|
2021-06-15 12:41:40 +08:00
|
|
|
node.historical,
|
|
|
|
node.streaming,
|
2021-05-28 15:40:32 +08:00
|
|
|
node.msFactory)
|
2020-09-15 15:53:10 +08:00
|
|
|
|
2021-04-12 09:18:43 +08:00
|
|
|
// start task scheduler
|
|
|
|
go node.scheduler.Start()
|
|
|
|
|
2021-01-19 11:37:16 +08:00
|
|
|
// start services
|
2021-05-28 10:26:30 +08:00
|
|
|
go node.historical.start()
|
2021-10-20 17:54:43 +08:00
|
|
|
go node.watchChangeInfo()
|
2021-11-12 18:27:10 +08:00
|
|
|
go node.statsService.start()
|
2021-09-23 18:29:55 +08:00
|
|
|
|
2021-10-12 19:56:34 +08:00
|
|
|
Params.CreatedTime = time.Now()
|
|
|
|
Params.UpdatedTime = time.Now()
|
|
|
|
|
2021-03-12 14:22:09 +08:00
|
|
|
node.UpdateStateCode(internalpb.StateCode_Healthy)
|
2021-11-12 18:27:10 +08:00
|
|
|
log.Debug("query node start successfully",
|
|
|
|
zap.Any("queryNodeID", Params.QueryNodeID),
|
|
|
|
zap.Any("IP", Params.QueryNodeIP),
|
|
|
|
zap.Any("Port", Params.QueryNodePort),
|
|
|
|
)
|
2021-01-21 10:01:29 +08:00
|
|
|
return nil
|
2020-11-05 10:52:50 +08:00
|
|
|
}
|
2020-09-15 15:53:10 +08:00
|
|
|
|
2021-11-03 23:48:17 +08:00
|
|
|
// Stop mainly stop QueryNode's query service, historical loop and streaming loop.
|
2021-01-21 10:01:29 +08:00
|
|
|
func (node *QueryNode) Stop() error {
|
2021-03-12 14:22:09 +08:00
|
|
|
node.UpdateStateCode(internalpb.StateCode_Abnormal)
|
2020-12-08 14:41:04 +08:00
|
|
|
node.queryNodeLoopCancel()
|
|
|
|
|
2020-11-24 16:12:39 +08:00
|
|
|
// close services
|
2021-11-06 11:02:58 +08:00
|
|
|
if node.dataSyncService != nil {
|
|
|
|
node.dataSyncService.close()
|
|
|
|
}
|
2021-05-28 10:26:30 +08:00
|
|
|
if node.historical != nil {
|
|
|
|
node.historical.close()
|
|
|
|
}
|
|
|
|
if node.streaming != nil {
|
|
|
|
node.streaming.close()
|
2020-11-24 16:12:39 +08:00
|
|
|
}
|
2021-06-23 20:26:10 +08:00
|
|
|
if node.queryService != nil {
|
|
|
|
node.queryService.close()
|
2020-11-24 16:12:39 +08:00
|
|
|
}
|
2021-11-12 18:27:10 +08:00
|
|
|
if node.statsService != nil {
|
|
|
|
node.statsService.close()
|
|
|
|
}
|
2021-11-16 22:31:14 +08:00
|
|
|
node.session.Revoke(time.Second)
|
2021-01-21 10:01:29 +08:00
|
|
|
return nil
|
2021-01-19 11:37:16 +08:00
|
|
|
}
|
|
|
|
|
2021-10-28 23:52:42 +08:00
|
|
|
// UpdateStateCode updata the state of query node, which can be initializing, healthy, and abnormal
|
2021-03-12 14:22:09 +08:00
|
|
|
func (node *QueryNode) UpdateStateCode(code internalpb.StateCode) {
|
2021-02-23 11:40:30 +08:00
|
|
|
node.stateCode.Store(code)
|
|
|
|
}
|
|
|
|
|
2021-11-03 23:48:17 +08:00
|
|
|
// SetRootCoord assigns parameter rc to its member rootCoord.
|
2021-06-21 17:28:03 +08:00
|
|
|
func (node *QueryNode) SetRootCoord(rc types.RootCoord) error {
|
|
|
|
if rc == nil {
|
2021-06-22 16:44:09 +08:00
|
|
|
return errors.New("null root coordinator interface")
|
2021-01-27 14:41:56 +08:00
|
|
|
}
|
2021-06-21 17:28:03 +08:00
|
|
|
node.rootCoord = rc
|
2021-01-27 14:41:56 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-11-03 23:48:17 +08:00
|
|
|
// SetIndexCoord assigns parameter index to its member indexCoord.
|
2021-06-21 17:28:03 +08:00
|
|
|
func (node *QueryNode) SetIndexCoord(index types.IndexCoord) error {
|
2021-01-26 13:41:41 +08:00
|
|
|
if index == nil {
|
2021-06-22 16:44:09 +08:00
|
|
|
return errors.New("null index coordinator interface")
|
2021-01-26 13:41:41 +08:00
|
|
|
}
|
2021-06-21 17:28:03 +08:00
|
|
|
node.indexCoord = index
|
2021-01-26 13:41:41 +08:00
|
|
|
return nil
|
|
|
|
}
|
2021-10-20 17:54:43 +08:00
|
|
|
|
|
|
|
func (node *QueryNode) watchChangeInfo() {
|
|
|
|
log.Debug("query node watchChangeInfo start")
|
2021-11-17 12:13:10 +08:00
|
|
|
watchChan := node.etcdKV.WatchWithPrefix(util.ChangeInfoMetaPrefix)
|
2021-10-20 17:54:43 +08:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-node.queryNodeLoopCtx.Done():
|
|
|
|
log.Debug("query node watchChangeInfo close")
|
|
|
|
return
|
|
|
|
case resp := <-watchChan:
|
|
|
|
for _, event := range resp.Events {
|
|
|
|
switch event.Type {
|
|
|
|
case mvccpb.PUT:
|
|
|
|
infoID, err := strconv.ParseInt(filepath.Base(string(event.Kv.Key)), 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("Parse SealedSegmentsChangeInfo id failed", zap.Any("error", err.Error()))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
log.Debug("get SealedSegmentsChangeInfo from etcd",
|
|
|
|
zap.Any("infoID", infoID),
|
|
|
|
)
|
|
|
|
info := &querypb.SealedSegmentsChangeInfo{}
|
|
|
|
err = proto.Unmarshal(event.Kv.Value, info)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("Unmarshal SealedSegmentsChangeInfo failed", zap.Any("error", err.Error()))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
go func() {
|
2021-11-17 12:13:10 +08:00
|
|
|
err = node.removeSegments(info)
|
2021-10-20 17:54:43 +08:00
|
|
|
if err != nil {
|
2021-11-17 12:13:10 +08:00
|
|
|
log.Warn("cleanup segments failed", zap.Any("error", err.Error()))
|
2021-10-20 17:54:43 +08:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
default:
|
|
|
|
// do nothing
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-26 15:18:22 +08:00
|
|
|
func (node *QueryNode) waitChangeInfo(segmentChangeInfos *querypb.SealedSegmentsChangeInfo) error {
|
2021-10-20 17:54:43 +08:00
|
|
|
fn := func() error {
|
2021-10-26 15:18:22 +08:00
|
|
|
for _, info := range segmentChangeInfos.Infos {
|
|
|
|
canDoLoadBalance := true
|
2021-11-17 12:13:10 +08:00
|
|
|
// make sure all query channel already received segment location changes
|
2021-10-26 15:18:22 +08:00
|
|
|
// Check online segments:
|
|
|
|
for _, segmentInfo := range info.OnlineSegments {
|
|
|
|
if node.queryService.hasQueryCollection(segmentInfo.CollectionID) {
|
|
|
|
qc, err := node.queryService.getQueryCollection(segmentInfo.CollectionID)
|
|
|
|
if err != nil {
|
|
|
|
canDoLoadBalance = false
|
|
|
|
break
|
|
|
|
}
|
2021-11-17 12:13:10 +08:00
|
|
|
if info.OnlineNodeID == Params.QueryNodeID && !qc.globalSegmentManager.hasGlobalSealedSegment(segmentInfo.SegmentID) {
|
2021-10-26 15:18:22 +08:00
|
|
|
canDoLoadBalance = false
|
|
|
|
break
|
|
|
|
}
|
2021-10-20 17:54:43 +08:00
|
|
|
}
|
|
|
|
}
|
2021-10-26 15:18:22 +08:00
|
|
|
// Check offline segments:
|
|
|
|
for _, segmentInfo := range info.OfflineSegments {
|
|
|
|
if node.queryService.hasQueryCollection(segmentInfo.CollectionID) {
|
|
|
|
qc, err := node.queryService.getQueryCollection(segmentInfo.CollectionID)
|
|
|
|
if err != nil {
|
|
|
|
canDoLoadBalance = false
|
|
|
|
break
|
|
|
|
}
|
2021-11-17 12:13:10 +08:00
|
|
|
if info.OfflineNodeID == Params.QueryNodeID && qc.globalSegmentManager.hasGlobalSealedSegment(segmentInfo.SegmentID) {
|
2021-10-26 15:18:22 +08:00
|
|
|
canDoLoadBalance = false
|
|
|
|
break
|
|
|
|
}
|
2021-10-20 17:54:43 +08:00
|
|
|
}
|
|
|
|
}
|
2021-10-26 15:18:22 +08:00
|
|
|
if canDoLoadBalance {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return errors.New(fmt.Sprintln("waitChangeInfo failed, infoID = ", segmentChangeInfos.Base.GetMsgID()))
|
2021-10-20 17:54:43 +08:00
|
|
|
}
|
2021-10-26 15:18:22 +08:00
|
|
|
|
|
|
|
return nil
|
2021-10-20 17:54:43 +08:00
|
|
|
}
|
|
|
|
|
2021-12-03 15:15:32 +08:00
|
|
|
return retry.Do(node.queryNodeLoopCtx, fn, retry.Attempts(50))
|
2021-10-20 17:54:43 +08:00
|
|
|
}
|
|
|
|
|
2021-11-17 12:13:10 +08:00
|
|
|
// remove the segments since it's already compacted or balanced to other querynodes
|
|
|
|
func (node *QueryNode) removeSegments(segmentChangeInfos *querypb.SealedSegmentsChangeInfo) error {
|
2021-10-26 15:18:22 +08:00
|
|
|
err := node.waitChangeInfo(segmentChangeInfos)
|
2021-10-20 17:54:43 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-10-29 17:12:43 +08:00
|
|
|
node.streaming.replica.queryLock()
|
|
|
|
node.historical.replica.queryLock()
|
|
|
|
defer node.streaming.replica.queryUnlock()
|
|
|
|
defer node.historical.replica.queryUnlock()
|
2021-10-26 15:18:22 +08:00
|
|
|
for _, info := range segmentChangeInfos.Infos {
|
|
|
|
// For online segments:
|
|
|
|
for _, segmentInfo := range info.OnlineSegments {
|
|
|
|
// delete growing segment because these segments are loaded in historical.
|
|
|
|
hasGrowingSegment := node.streaming.replica.hasSegment(segmentInfo.SegmentID)
|
|
|
|
if hasGrowingSegment {
|
|
|
|
err := node.streaming.replica.removeSegment(segmentInfo.SegmentID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-11-17 12:13:10 +08:00
|
|
|
log.Debug("remove growing segment in removeSegments",
|
2021-10-26 15:18:22 +08:00
|
|
|
zap.Any("collectionID", segmentInfo.CollectionID),
|
|
|
|
zap.Any("segmentID", segmentInfo.SegmentID),
|
|
|
|
zap.Any("infoID", segmentChangeInfos.Base.GetMsgID()),
|
|
|
|
)
|
2021-10-20 17:54:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-26 15:18:22 +08:00
|
|
|
// For offline segments:
|
2021-11-17 12:13:10 +08:00
|
|
|
for _, segmentInfo := range info.OfflineSegments {
|
2021-10-26 15:18:22 +08:00
|
|
|
// load balance or compaction, remove old sealed segments.
|
|
|
|
if info.OfflineNodeID == Params.QueryNodeID {
|
2021-11-17 12:13:10 +08:00
|
|
|
err := node.historical.replica.removeSegment(segmentInfo.SegmentID)
|
2021-10-26 15:18:22 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-11-17 12:13:10 +08:00
|
|
|
log.Debug("remove sealed segment", zap.Any("collectionID", segmentInfo.CollectionID),
|
|
|
|
zap.Any("segmentID", segmentInfo.SegmentID),
|
|
|
|
zap.Any("infoID", segmentChangeInfos.Base.GetMsgID()),
|
|
|
|
)
|
2021-10-20 17:54:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|