2021-12-09 14:37:22 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
2021-04-19 13:47:10 +08:00
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
2021-12-09 14:37:22 +08:00
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
2021-04-19 13:47:10 +08:00
|
|
|
//
|
2021-12-09 14:37:22 +08:00
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2021-04-19 13:47:10 +08:00
|
|
|
|
2021-01-16 10:12:14 +08:00
|
|
|
package querynode
|
2020-08-25 15:45:19 +08:00
|
|
|
|
2020-09-02 10:38:08 +08:00
|
|
|
/*
|
2022-09-21 20:16:51 +08:00
|
|
|
#cgo pkg-config: milvus_segcore milvus_common
|
2020-09-02 10:38:08 +08:00
|
|
|
|
2020-11-25 10:31:51 +08:00
|
|
|
#include "segcore/collection_c.h"
|
|
|
|
#include "segcore/segment_c.h"
|
2021-04-16 14:02:49 +08:00
|
|
|
#include "segcore/segcore_init_c.h"
|
2022-09-21 20:16:51 +08:00
|
|
|
#include "common/init_c.h"
|
2020-09-02 10:38:08 +08:00
|
|
|
|
|
|
|
*/
|
2020-08-25 15:45:19 +08:00
|
|
|
import "C"
|
2020-09-02 10:38:08 +08:00
|
|
|
|
2020-08-25 15:45:19 +08:00
|
|
|
import (
|
2020-10-15 21:31:50 +08:00
|
|
|
"context"
|
2021-10-20 17:54:43 +08:00
|
|
|
"fmt"
|
2022-03-17 17:17:22 +08:00
|
|
|
"os"
|
2022-05-03 08:39:49 +08:00
|
|
|
"path"
|
2022-08-09 16:34:37 +08:00
|
|
|
"runtime"
|
2022-11-04 22:25:01 +08:00
|
|
|
"runtime/debug"
|
2021-09-24 21:03:56 +08:00
|
|
|
"sync"
|
2021-11-22 16:23:17 +08:00
|
|
|
"syscall"
|
2021-10-12 19:56:34 +08:00
|
|
|
"time"
|
2021-09-15 12:57:48 +08:00
|
|
|
"unsafe"
|
2021-06-21 18:22:13 +08:00
|
|
|
|
2023-02-26 11:31:49 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/mq/msgdispatcher"
|
|
|
|
|
2022-10-16 20:49:27 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
2021-07-13 14:16:00 +08:00
|
|
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
2022-03-17 18:03:23 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/types"
|
2023-02-28 14:19:47 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/conc"
|
2022-04-07 22:05:32 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/dependency"
|
2022-11-04 22:25:01 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/gc"
|
2022-11-03 19:29:36 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/hardware"
|
2022-09-21 20:16:51 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/initcore"
|
2023-01-29 17:45:49 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/lifetime"
|
2022-09-21 20:16:51 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/metricsinfo"
|
2021-12-23 18:39:11 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/paramtable"
|
2021-05-21 19:28:52 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
2022-12-16 10:03:27 +08:00
|
|
|
"github.com/samber/lo"
|
|
|
|
clientv3 "go.etcd.io/etcd/client/v3"
|
|
|
|
"go.uber.org/zap"
|
2020-08-25 15:45:19 +08:00
|
|
|
)
|
|
|
|
|
2021-10-05 12:42:04 +08:00
|
|
|
// make sure QueryNode implements types.QueryNode
|
|
|
|
var _ types.QueryNode = (*QueryNode)(nil)
|
|
|
|
|
2021-10-11 17:16:30 +08:00
|
|
|
// make sure QueryNode implements types.QueryNodeComponent
|
|
|
|
var _ types.QueryNodeComponent = (*QueryNode)(nil)
|
2021-10-05 12:42:04 +08:00
|
|
|
|
2022-11-04 14:25:38 +08:00
|
|
|
var Params *paramtable.ComponentParam = paramtable.Get()
|
2021-12-23 18:39:11 +08:00
|
|
|
|
2022-09-16 09:56:47 +08:00
|
|
|
// rateCol is global rateCollector in QueryNode.
|
|
|
|
var rateCol *rateCollector
|
|
|
|
|
2021-09-26 20:16:12 +08:00
|
|
|
// QueryNode communicates with outside services and union all
|
|
|
|
// services in querynode package.
|
|
|
|
//
|
|
|
|
// QueryNode implements `types.Component`, `types.QueryNode` interfaces.
|
2022-12-02 00:33:21 +08:00
|
|
|
//
|
|
|
|
// `rootCoord` is a grpc client of root coordinator.
|
|
|
|
// `indexCoord` is a grpc client of index coordinator.
|
|
|
|
// `stateCode` is current statement of this query node, indicating whether it's healthy.
|
2020-08-25 15:45:19 +08:00
|
|
|
type QueryNode struct {
|
2020-12-08 14:41:04 +08:00
|
|
|
queryNodeLoopCtx context.Context
|
2020-12-10 16:31:09 +08:00
|
|
|
queryNodeLoopCancel context.CancelFunc
|
2020-10-15 21:31:50 +08:00
|
|
|
|
2023-01-29 17:45:49 +08:00
|
|
|
lifetime lifetime.Lifetime[commonpb.StateCode]
|
2022-01-17 14:41:35 +08:00
|
|
|
|
2023-01-29 17:45:49 +08:00
|
|
|
stopOnce sync.Once
|
2020-08-25 15:45:19 +08:00
|
|
|
|
2021-09-24 21:03:56 +08:00
|
|
|
//call once
|
|
|
|
initOnce sync.Once
|
2021-09-23 18:29:55 +08:00
|
|
|
|
2021-05-28 10:26:30 +08:00
|
|
|
// internal components
|
2022-05-31 13:42:03 +08:00
|
|
|
metaReplica ReplicaInterface
|
2020-08-25 15:45:19 +08:00
|
|
|
|
2021-11-06 11:02:58 +08:00
|
|
|
// tSafeReplica
|
|
|
|
tSafeReplica TSafeReplicaInterface
|
|
|
|
|
|
|
|
// dataSyncService
|
|
|
|
dataSyncService *dataSyncService
|
|
|
|
|
2021-11-12 18:27:10 +08:00
|
|
|
// segment loader
|
|
|
|
loader *segmentLoader
|
2021-01-18 10:09:17 +08:00
|
|
|
|
2021-12-29 14:35:21 +08:00
|
|
|
// etcd client
|
|
|
|
etcdCli *clientv3.Client
|
2022-11-04 14:25:38 +08:00
|
|
|
address string
|
2021-12-29 14:35:21 +08:00
|
|
|
|
2023-02-13 16:38:33 +08:00
|
|
|
dispClient msgdispatcher.Client
|
|
|
|
factory dependency.Factory
|
|
|
|
scheduler *taskScheduler
|
2021-05-21 19:28:52 +08:00
|
|
|
|
2023-01-12 19:49:40 +08:00
|
|
|
sessionMu sync.Mutex
|
|
|
|
session *sessionutil.Session
|
|
|
|
eventCh <-chan *sessionutil.SessionEvent
|
2021-06-19 11:45:09 +08:00
|
|
|
|
2022-04-07 22:05:32 +08:00
|
|
|
vectorStorage storage.ChunkManager
|
|
|
|
etcdKV *etcdkv.EtcdKV
|
2022-04-20 16:15:41 +08:00
|
|
|
|
|
|
|
// shard cluster service, handle shard leader functions
|
|
|
|
ShardClusterService *ShardClusterService
|
|
|
|
//shard query service, handles shard-level query & search
|
|
|
|
queryShardService *queryShardService
|
2022-08-09 16:34:37 +08:00
|
|
|
|
2022-10-31 13:55:33 +08:00
|
|
|
// pool for load/release channel
|
2023-02-28 14:19:47 +08:00
|
|
|
taskPool *conc.Pool
|
2023-01-10 21:09:38 +08:00
|
|
|
|
|
|
|
IsStandAlone bool
|
|
|
|
}
|
|
|
|
|
|
|
|
var queryNode *QueryNode = nil
|
|
|
|
|
|
|
|
func GetQueryNode() *QueryNode {
|
|
|
|
return queryNode
|
2020-11-05 10:52:50 +08:00
|
|
|
}
|
2020-09-07 17:01:46 +08:00
|
|
|
|
2021-09-27 19:02:09 +08:00
|
|
|
// NewQueryNode will return a QueryNode with abnormal state.
|
2022-04-07 22:05:32 +08:00
|
|
|
func NewQueryNode(ctx context.Context, factory dependency.Factory) *QueryNode {
|
2021-01-27 09:50:52 +08:00
|
|
|
ctx1, cancel := context.WithCancel(ctx)
|
2023-01-10 21:09:38 +08:00
|
|
|
|
|
|
|
queryNode = &QueryNode{
|
2021-01-27 09:50:52 +08:00
|
|
|
queryNodeLoopCtx: ctx1,
|
|
|
|
queryNodeLoopCancel: cancel,
|
2022-04-07 22:05:32 +08:00
|
|
|
factory: factory,
|
2023-01-10 21:09:38 +08:00
|
|
|
IsStandAlone: os.Getenv(metricsinfo.DeployModeEnvKey) == metricsinfo.StandaloneDeployMode,
|
2023-01-29 17:45:49 +08:00
|
|
|
lifetime: lifetime.NewLifetime(commonpb.StateCode_Abnormal),
|
2021-01-27 09:50:52 +08:00
|
|
|
}
|
|
|
|
|
2023-01-10 21:09:38 +08:00
|
|
|
queryNode.tSafeReplica = newTSafeReplica()
|
|
|
|
queryNode.scheduler = newTaskScheduler(ctx1, queryNode.tSafeReplica)
|
2021-01-27 09:50:52 +08:00
|
|
|
|
2023-01-10 21:09:38 +08:00
|
|
|
return queryNode
|
2020-09-15 15:53:10 +08:00
|
|
|
}
|
|
|
|
|
2021-12-15 11:47:10 +08:00
|
|
|
func (node *QueryNode) initSession() error {
|
2022-11-17 18:59:09 +08:00
|
|
|
node.session = sessionutil.NewSession(node.queryNodeLoopCtx, Params.EtcdCfg.MetaRootPath.GetValue(), node.etcdCli)
|
2021-12-15 11:47:10 +08:00
|
|
|
if node.session == nil {
|
|
|
|
return fmt.Errorf("session is nil, the etcd client connection may have failed")
|
|
|
|
}
|
2022-11-04 14:25:38 +08:00
|
|
|
node.session.Init(typeutil.QueryNodeRole, node.address, false, true)
|
2021-12-15 11:47:10 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Register register query node at etcd
|
|
|
|
func (node *QueryNode) Register() error {
|
|
|
|
node.session.Register()
|
2021-10-14 16:40:35 +08:00
|
|
|
// start liveness check
|
|
|
|
go node.session.LivenessCheck(node.queryNodeLoopCtx, func() {
|
2022-11-04 14:25:38 +08:00
|
|
|
log.Error("Query Node disconnected from etcd, process will exit", zap.Int64("Server Id", paramtable.GetNodeID()))
|
2021-10-30 10:24:38 +08:00
|
|
|
if err := node.Stop(); err != nil {
|
|
|
|
log.Fatal("failed to stop server", zap.Error(err))
|
|
|
|
}
|
2021-11-22 16:23:17 +08:00
|
|
|
// manually send signal to starter goroutine
|
2021-12-29 14:35:21 +08:00
|
|
|
if node.session.TriggerKill {
|
2022-03-17 17:17:22 +08:00
|
|
|
if p, err := os.FindProcess(os.Getpid()); err == nil {
|
|
|
|
p.Signal(syscall.SIGINT)
|
|
|
|
}
|
2021-12-29 14:35:21 +08:00
|
|
|
}
|
2021-10-14 16:40:35 +08:00
|
|
|
})
|
|
|
|
|
2021-09-27 17:37:57 +08:00
|
|
|
//TODO Reset the logger
|
|
|
|
//Params.initLogCfg()
|
2021-05-25 15:06:05 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-09-16 09:56:47 +08:00
|
|
|
// initRateCollector creates and starts rateCollector in QueryNode.
|
|
|
|
func (node *QueryNode) initRateCollector() error {
|
|
|
|
var err error
|
|
|
|
rateCol, err = newRateCollector()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
rateCol.Register(metricsinfo.NQPerSecond)
|
|
|
|
rateCol.Register(metricsinfo.SearchThroughput)
|
|
|
|
rateCol.Register(metricsinfo.InsertConsumeThroughput)
|
|
|
|
rateCol.Register(metricsinfo.DeleteConsumeThroughput)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-11-05 09:19:31 +08:00
|
|
|
// InitSegcore set init params of segCore, such as chunckRows, SIMD type...
|
2021-09-15 10:35:52 +08:00
|
|
|
func (node *QueryNode) InitSegcore() {
|
2022-05-03 08:39:49 +08:00
|
|
|
cEasyloggingYaml := C.CString(path.Join(Params.BaseTable.GetConfigDir(), paramtable.DefaultEasyloggingYaml))
|
|
|
|
C.SegcoreInit(cEasyloggingYaml)
|
|
|
|
C.free(unsafe.Pointer(cEasyloggingYaml))
|
2021-09-15 10:35:52 +08:00
|
|
|
|
2022-12-02 00:33:21 +08:00
|
|
|
cpuNum := runtime.GOMAXPROCS(0)
|
|
|
|
C.SegcoreSetThreadPoolNum(C.uint32_t(cpuNum))
|
|
|
|
|
2021-09-15 10:35:52 +08:00
|
|
|
// override segcore chunk size
|
2022-12-07 18:01:19 +08:00
|
|
|
cChunkRows := C.int64_t(Params.QueryNodeCfg.ChunkRows.GetAsInt64())
|
2021-09-15 15:15:52 +08:00
|
|
|
C.SegcoreSetChunkRows(cChunkRows)
|
2021-09-15 12:57:48 +08:00
|
|
|
|
2022-12-07 18:01:19 +08:00
|
|
|
nlist := C.int64_t(Params.QueryNodeCfg.SmallIndexNlist.GetAsInt64())
|
2022-05-05 09:49:50 +08:00
|
|
|
C.SegcoreSetNlist(nlist)
|
|
|
|
|
2022-12-07 18:01:19 +08:00
|
|
|
nprobe := C.int64_t(Params.QueryNodeCfg.SmallIndexNProbe.GetAsInt64())
|
2022-05-05 09:49:50 +08:00
|
|
|
C.SegcoreSetNprobe(nprobe)
|
|
|
|
|
2021-09-15 12:57:48 +08:00
|
|
|
// override segcore SIMD type
|
2022-12-07 18:01:19 +08:00
|
|
|
cSimdType := C.CString(Params.CommonCfg.SimdType.GetValue())
|
2022-11-04 14:25:38 +08:00
|
|
|
C.SegcoreSetSimdType(cSimdType)
|
2021-09-15 12:57:48 +08:00
|
|
|
C.free(unsafe.Pointer(cSimdType))
|
2022-04-08 20:29:33 +08:00
|
|
|
|
|
|
|
// override segcore index slice size
|
2022-12-07 18:01:19 +08:00
|
|
|
cIndexSliceSize := C.int64_t(Params.CommonCfg.IndexSliceSize.GetAsInt64())
|
2022-10-16 21:17:25 +08:00
|
|
|
C.InitIndexSliceSize(cIndexSliceSize)
|
2022-09-21 20:16:51 +08:00
|
|
|
|
2022-12-07 18:01:19 +08:00
|
|
|
cThreadCoreCoefficient := C.int64_t(Params.CommonCfg.ThreadCoreCoefficient.GetAsInt64())
|
2022-11-03 14:39:40 +08:00
|
|
|
C.InitThreadCoreCoefficient(cThreadCoreCoefficient)
|
|
|
|
|
2022-11-17 18:59:09 +08:00
|
|
|
cCPUNum := C.int(hardware.GetCPUNum())
|
|
|
|
C.InitCpuNum(cCPUNum)
|
2022-11-03 19:29:36 +08:00
|
|
|
|
2023-03-14 23:21:56 +08:00
|
|
|
// init GPU resource
|
|
|
|
cGpuId := C.int32_t(0)
|
|
|
|
cResNum := C.int32_t(1)
|
|
|
|
C.SegcoreInitGPU(cGpuId, cResNum)
|
|
|
|
|
2022-11-04 14:25:38 +08:00
|
|
|
initcore.InitLocalStorageConfig(Params)
|
2023-03-10 15:51:52 +08:00
|
|
|
|
|
|
|
mmapDirPath := paramtable.Get().QueryNodeCfg.MmapDirPath.GetValue()
|
|
|
|
if len(mmapDirPath) > 0 {
|
|
|
|
log.Info("mmap enabled", zap.String("dir", mmapDirPath))
|
|
|
|
}
|
2021-09-15 10:35:52 +08:00
|
|
|
}
|
|
|
|
|
2021-11-03 23:15:16 +08:00
|
|
|
// Init function init historical and streaming module to manage segments
|
2021-01-21 10:01:29 +08:00
|
|
|
func (node *QueryNode) Init() error {
|
2022-11-04 14:25:38 +08:00
|
|
|
var initError error
|
2021-09-24 21:03:56 +08:00
|
|
|
node.initOnce.Do(func() {
|
|
|
|
//ctx := context.Background()
|
2022-11-17 18:59:09 +08:00
|
|
|
log.Info("QueryNode session info", zap.String("metaPath", Params.EtcdCfg.MetaRootPath.GetValue()))
|
2021-12-15 11:47:10 +08:00
|
|
|
err := node.initSession()
|
|
|
|
if err != nil {
|
|
|
|
log.Error("QueryNode init session failed", zap.Error(err))
|
|
|
|
initError = err
|
|
|
|
return
|
|
|
|
}
|
2021-12-29 14:35:21 +08:00
|
|
|
|
2022-11-04 14:25:38 +08:00
|
|
|
node.factory.Init(Params)
|
2022-03-17 18:03:23 +08:00
|
|
|
|
2022-09-16 09:56:47 +08:00
|
|
|
err = node.initRateCollector()
|
|
|
|
if err != nil {
|
2022-11-04 14:25:38 +08:00
|
|
|
log.Error("QueryNode init rateCollector failed", zap.Int64("nodeID", paramtable.GetNodeID()), zap.Error(err))
|
2022-09-16 09:56:47 +08:00
|
|
|
initError = err
|
|
|
|
return
|
|
|
|
}
|
2022-11-04 14:25:38 +08:00
|
|
|
log.Info("QueryNode init rateCollector done", zap.Int64("nodeID", paramtable.GetNodeID()))
|
2022-09-16 09:56:47 +08:00
|
|
|
|
2023-02-13 16:38:33 +08:00
|
|
|
node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, paramtable.GetNodeID())
|
|
|
|
log.Info("QueryNode init dispatcher client done", zap.Int64("nodeID", paramtable.GetNodeID()))
|
|
|
|
|
2022-09-23 14:40:51 +08:00
|
|
|
node.vectorStorage, err = node.factory.NewPersistentStorageChunkManager(node.queryNodeLoopCtx)
|
2022-03-17 18:03:23 +08:00
|
|
|
if err != nil {
|
2022-04-07 22:05:32 +08:00
|
|
|
log.Error("QueryNode init vector storage failed", zap.Error(err))
|
|
|
|
initError = err
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2022-11-17 18:59:09 +08:00
|
|
|
node.etcdKV = etcdkv.NewEtcdKV(node.etcdCli, Params.EtcdCfg.MetaRootPath.GetValue())
|
2022-05-07 10:27:51 +08:00
|
|
|
log.Info("queryNode try to connect etcd success", zap.Any("MetaRootPath", Params.EtcdCfg.MetaRootPath))
|
2021-11-06 11:02:58 +08:00
|
|
|
|
2023-02-28 14:19:47 +08:00
|
|
|
node.taskPool = conc.NewDefaultPool()
|
2022-12-02 00:33:21 +08:00
|
|
|
node.metaReplica = newCollectionReplica()
|
2022-03-17 18:03:23 +08:00
|
|
|
node.loader = newSegmentLoader(
|
2022-05-31 13:42:03 +08:00
|
|
|
node.metaReplica,
|
2021-11-21 07:33:14 +08:00
|
|
|
node.etcdKV,
|
2022-04-07 22:05:32 +08:00
|
|
|
node.vectorStorage,
|
2022-12-02 00:33:21 +08:00
|
|
|
node.factory)
|
2021-11-12 18:27:10 +08:00
|
|
|
|
2023-02-13 16:38:33 +08:00
|
|
|
node.dataSyncService = newDataSyncService(node.queryNodeLoopCtx, node.metaReplica, node.tSafeReplica, node.dispClient, node.factory)
|
2021-09-24 21:03:56 +08:00
|
|
|
|
|
|
|
node.InitSegcore()
|
|
|
|
|
2022-12-07 18:01:19 +08:00
|
|
|
if Params.QueryNodeCfg.GCHelperEnabled.GetAsBool() {
|
2022-11-04 22:25:01 +08:00
|
|
|
action := func(GOGC uint32) {
|
|
|
|
debug.SetGCPercent(int(GOGC))
|
|
|
|
}
|
2022-12-07 18:01:19 +08:00
|
|
|
gc.NewTuner(Params.QueryNodeCfg.OverloadedMemoryThresholdPercentage.GetAsFloat(), uint32(Params.QueryNodeCfg.MinimumGOGCConfig.GetAsInt()), uint32(Params.QueryNodeCfg.MaximumGOGCConfig.GetAsInt()), action)
|
2022-11-04 22:25:01 +08:00
|
|
|
} else {
|
|
|
|
action := func(uint32) {}
|
2022-12-07 18:01:19 +08:00
|
|
|
gc.NewTuner(Params.QueryNodeCfg.OverloadedMemoryThresholdPercentage.GetAsFloat(), uint32(Params.QueryNodeCfg.MinimumGOGCConfig.GetAsInt()), uint32(Params.QueryNodeCfg.MaximumGOGCConfig.GetAsInt()), action)
|
2022-11-04 22:25:01 +08:00
|
|
|
}
|
|
|
|
|
2022-05-07 10:27:51 +08:00
|
|
|
log.Info("query node init successfully",
|
2022-11-04 14:25:38 +08:00
|
|
|
zap.Int64("queryNodeID", paramtable.GetNodeID()),
|
|
|
|
zap.String("Address", node.address),
|
2021-11-12 18:27:10 +08:00
|
|
|
)
|
2021-09-24 21:03:56 +08:00
|
|
|
})
|
2021-01-26 13:41:41 +08:00
|
|
|
|
2021-09-24 21:03:56 +08:00
|
|
|
return initError
|
2021-01-30 16:02:10 +08:00
|
|
|
}
|
|
|
|
|
2021-11-03 23:48:17 +08:00
|
|
|
// Start mainly start QueryNode's query service.
|
2021-01-30 16:02:10 +08:00
|
|
|
func (node *QueryNode) Start() error {
|
2021-04-12 09:18:43 +08:00
|
|
|
// start task scheduler
|
|
|
|
go node.scheduler.Start()
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
// create shardClusterService for shardLeader functions.
|
|
|
|
node.ShardClusterService = newShardClusterService(node.etcdCli, node.session, node)
|
|
|
|
// create shard-level query service
|
2022-09-23 14:40:51 +08:00
|
|
|
queryShardService, err := newQueryShardService(node.queryNodeLoopCtx, node.metaReplica, node.tSafeReplica,
|
2022-05-23 16:41:58 +08:00
|
|
|
node.ShardClusterService, node.factory, node.scheduler)
|
2022-09-23 14:40:51 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
node.queryShardService = queryShardService
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2022-10-10 15:55:22 +08:00
|
|
|
node.UpdateStateCode(commonpb.StateCode_Healthy)
|
2022-05-07 10:27:51 +08:00
|
|
|
log.Info("query node start successfully",
|
2022-11-04 14:25:38 +08:00
|
|
|
zap.Int64("queryNodeID", paramtable.GetNodeID()),
|
|
|
|
zap.String("Address", node.address),
|
2021-11-12 18:27:10 +08:00
|
|
|
)
|
2021-01-21 10:01:29 +08:00
|
|
|
return nil
|
2020-11-05 10:52:50 +08:00
|
|
|
}
|
2020-09-15 15:53:10 +08:00
|
|
|
|
2021-11-03 23:48:17 +08:00
|
|
|
// Stop mainly stop QueryNode's query service, historical loop and streaming loop.
|
2021-01-21 10:01:29 +08:00
|
|
|
func (node *QueryNode) Stop() error {
|
2022-12-14 18:21:23 +08:00
|
|
|
node.stopOnce.Do(func() {
|
|
|
|
log.Warn("Query node stop..")
|
2022-12-16 10:03:27 +08:00
|
|
|
node.UpdateStateCode(commonpb.StateCode_Stopping)
|
2022-12-14 18:21:23 +08:00
|
|
|
err := node.session.GoingStop()
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("session fail to go stopping state", zap.Error(err))
|
|
|
|
} else {
|
|
|
|
noSegmentChan := node.metaReplica.getNoSegmentChan()
|
|
|
|
select {
|
|
|
|
case <-noSegmentChan:
|
|
|
|
case <-time.After(Params.QueryNodeCfg.GracefulStopTimeout.GetAsDuration(time.Second)):
|
|
|
|
log.Warn("migrate data timed out", zap.Int64("server_id", paramtable.GetNodeID()),
|
|
|
|
zap.Int64s("sealed_segment", lo.Map(node.metaReplica.getSealedSegments(), func(t *Segment, i int) int64 {
|
|
|
|
return t.ID()
|
|
|
|
})),
|
|
|
|
zap.Int64s("growing_segment", lo.Map(node.metaReplica.getGrowingSegments(), func(t *Segment, i int) int64 {
|
|
|
|
return t.ID()
|
|
|
|
})),
|
|
|
|
)
|
|
|
|
}
|
2022-12-06 22:59:19 +08:00
|
|
|
}
|
|
|
|
|
2022-12-14 18:21:23 +08:00
|
|
|
node.UpdateStateCode(commonpb.StateCode_Abnormal)
|
2023-01-29 17:45:49 +08:00
|
|
|
node.lifetime.Wait()
|
2022-12-14 18:21:23 +08:00
|
|
|
node.queryNodeLoopCancel()
|
2020-12-08 14:41:04 +08:00
|
|
|
|
2022-12-14 18:21:23 +08:00
|
|
|
// close services
|
|
|
|
if node.dataSyncService != nil {
|
|
|
|
node.dataSyncService.close()
|
|
|
|
}
|
2022-05-19 14:53:56 +08:00
|
|
|
|
2022-12-14 18:21:23 +08:00
|
|
|
if node.metaReplica != nil {
|
|
|
|
node.metaReplica.freeAll()
|
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2022-12-14 18:21:23 +08:00
|
|
|
if node.ShardClusterService != nil {
|
|
|
|
node.ShardClusterService.close()
|
|
|
|
}
|
2022-11-02 14:59:34 +08:00
|
|
|
|
2022-12-14 18:21:23 +08:00
|
|
|
if node.queryShardService != nil {
|
|
|
|
node.queryShardService.close()
|
|
|
|
}
|
|
|
|
|
|
|
|
node.session.Revoke(time.Second)
|
|
|
|
})
|
2022-05-19 14:53:56 +08:00
|
|
|
|
2021-01-21 10:01:29 +08:00
|
|
|
return nil
|
2021-01-19 11:37:16 +08:00
|
|
|
}
|
|
|
|
|
2021-10-28 23:52:42 +08:00
|
|
|
// UpdateStateCode updata the state of query node, which can be initializing, healthy, and abnormal
|
2022-10-10 15:55:22 +08:00
|
|
|
func (node *QueryNode) UpdateStateCode(code commonpb.StateCode) {
|
2023-01-29 17:45:49 +08:00
|
|
|
node.lifetime.SetState(code)
|
2021-02-23 11:40:30 +08:00
|
|
|
}
|
|
|
|
|
2021-12-29 14:35:21 +08:00
|
|
|
// SetEtcdClient assigns parameter client to its member etcdCli
|
|
|
|
func (node *QueryNode) SetEtcdClient(client *clientv3.Client) {
|
|
|
|
node.etcdCli = client
|
|
|
|
}
|
2022-11-04 14:25:38 +08:00
|
|
|
|
|
|
|
func (node *QueryNode) SetAddress(address string) {
|
|
|
|
node.address = address
|
|
|
|
}
|
2023-01-12 19:49:40 +08:00
|
|
|
|
|
|
|
func (node *QueryNode) GetAddress() string {
|
|
|
|
return node.address
|
|
|
|
}
|