2021-04-19 11:35:38 +08:00
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
2021-06-30 10:20:15 +08:00
2021-06-22 10:42:07 +08:00
package datacoord
2021-01-22 19:43:27 +08:00
import (
2021-06-08 19:25:37 +08:00
"fmt"
2021-01-22 19:43:27 +08:00
"sync"
2021-07-12 11:03:52 +08:00
"time"
2021-01-22 19:43:27 +08:00
2021-07-12 11:03:52 +08:00
"github.com/golang/protobuf/proto"
grpcdatanodeclient "github.com/milvus-io/milvus/internal/distributed/datanode/client"
"github.com/milvus-io/milvus/internal/kv"
2021-04-22 14:45:57 +08:00
"github.com/milvus-io/milvus/internal/log"
2021-07-12 11:03:52 +08:00
"github.com/milvus-io/milvus/internal/metrics"
2021-04-22 14:45:57 +08:00
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
2021-07-12 11:03:52 +08:00
"github.com/milvus-io/milvus/internal/types"
2021-05-26 19:06:56 +08:00
"go.uber.org/zap"
"golang.org/x/net/context"
2021-01-22 19:43:27 +08:00
)
2021-07-12 11:03:52 +08:00
const clusterPrefix = "cluster-prefix/"
const clusterBuffer = "cluster-buffer"
const nodeEventChBufferSize = 1024
2021-05-26 19:06:56 +08:00
2021-07-12 11:03:52 +08:00
const eventTimeout = 5 * time . Second
type EventType int
const (
Register EventType = 1
UnRegister EventType = 2
WatchChannel EventType = 3
FlushSegments EventType = 4
)
2021-05-26 19:06:56 +08:00
2021-07-12 11:03:52 +08:00
type NodeEventType int
const (
Watch NodeEventType = 0
Flush NodeEventType = 1
)
type Event struct {
Type EventType
Data interface { }
2021-03-05 20:41:34 +08:00
}
2021-01-22 19:43:27 +08:00
2021-07-12 11:03:52 +08:00
type WatchChannelParams struct {
Channel string
CollectionID UniqueID
2021-02-23 09:58:06 +08:00
}
2021-07-12 11:03:52 +08:00
type Cluster struct {
ctx context . Context
cancel context . CancelFunc
mu sync . Mutex
wg sync . WaitGroup
nodes ClusterStore
posProvider positionProvider
chanBuffer [ ] * datapb . ChannelStatus //Unwatched channels buffer
kv kv . TxnKV
registerPolicy dataNodeRegisterPolicy
unregisterPolicy dataNodeUnregisterPolicy
assignPolicy channelAssignPolicy
eventCh chan * Event
2021-01-22 19:43:27 +08:00
}
2021-07-12 11:03:52 +08:00
type ClusterOption func ( c * Cluster )
func withRegisterPolicy ( p dataNodeRegisterPolicy ) ClusterOption {
return func ( c * Cluster ) { c . registerPolicy = p }
2021-01-22 19:43:27 +08:00
}
2021-07-12 11:03:52 +08:00
func withUnregistorPolicy ( p dataNodeUnregisterPolicy ) ClusterOption {
return func ( c * Cluster ) { c . unregisterPolicy = p }
2021-01-22 19:43:27 +08:00
}
2021-07-12 11:03:52 +08:00
func withAssignPolicy ( p channelAssignPolicy ) ClusterOption {
return func ( c * Cluster ) { c . assignPolicy = p }
2021-01-22 19:43:27 +08:00
}
2021-05-26 19:06:56 +08:00
func defaultRegisterPolicy ( ) dataNodeRegisterPolicy {
2021-08-20 17:50:12 +08:00
return newAssignBufferRegisterPolicy ( )
2021-05-26 19:06:56 +08:00
}
func defaultUnregisterPolicy ( ) dataNodeUnregisterPolicy {
2021-06-23 14:28:08 +08:00
return randomAssignRegisterFunc
2021-05-26 19:06:56 +08:00
}
func defaultAssignPolicy ( ) channelAssignPolicy {
2021-06-08 19:25:37 +08:00
return newBalancedAssignPolicy ( )
2021-05-26 19:06:56 +08:00
}
2021-07-12 11:03:52 +08:00
func NewCluster ( ctx context . Context , kv kv . TxnKV , store ClusterStore ,
posProvider positionProvider , opts ... ClusterOption ) ( * Cluster , error ) {
ctx , cancel := context . WithCancel ( ctx )
c := & Cluster {
2021-05-26 19:06:56 +08:00
ctx : ctx ,
2021-07-12 11:03:52 +08:00
cancel : cancel ,
kv : kv ,
nodes : store ,
2021-05-27 14:14:05 +08:00
posProvider : posProvider ,
2021-07-12 11:03:52 +08:00
chanBuffer : [ ] * datapb . ChannelStatus { } ,
2021-05-26 19:06:56 +08:00
registerPolicy : defaultRegisterPolicy ( ) ,
unregisterPolicy : defaultUnregisterPolicy ( ) ,
2021-06-08 19:25:37 +08:00
assignPolicy : defaultAssignPolicy ( ) ,
2021-07-12 11:03:52 +08:00
eventCh : make ( chan * Event , nodeEventChBufferSize ) ,
2021-02-02 18:53:10 +08:00
}
2021-06-30 10:20:15 +08:00
2021-05-26 19:06:56 +08:00
for _ , opt := range opts {
2021-07-12 11:03:52 +08:00
opt ( c )
2021-02-02 18:53:10 +08:00
}
2021-05-26 19:06:56 +08:00
2021-07-12 11:03:52 +08:00
if err := c . loadFromKv ( ) ; err != nil {
return nil , err
}
return c , nil
2021-05-26 19:06:56 +08:00
}
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) loadFromKv ( ) error {
_ , values , err := c . kv . LoadWithPrefix ( clusterPrefix )
2021-06-28 13:28:14 +08:00
if err != nil {
2021-07-12 11:03:52 +08:00
return err
2021-06-28 13:28:14 +08:00
}
2021-05-26 19:06:56 +08:00
2021-07-12 11:03:52 +08:00
for _ , v := range values {
info := & datapb . DataNodeInfo { }
if err := proto . UnmarshalText ( v , info ) ; err != nil {
return err
2021-06-30 10:20:15 +08:00
}
2021-07-12 11:03:52 +08:00
node := NewNodeInfo ( c . ctx , info )
c . nodes . SetNode ( info . GetVersion ( ) , node )
go c . handleEvent ( node )
2021-06-30 10:20:15 +08:00
}
2021-07-12 11:03:52 +08:00
dn , _ := c . kv . Load ( clusterBuffer )
//TODO add not value error check
if dn != "" {
info := & datapb . DataNodeInfo { }
if err := proto . UnmarshalText ( dn , info ) ; err != nil {
return err
2021-07-02 16:48:30 +08:00
}
2021-07-12 11:03:52 +08:00
c . chanBuffer = info . Channels
2021-07-01 14:32:16 +08:00
}
2021-06-30 10:20:15 +08:00
2021-07-12 11:03:52 +08:00
return nil
}
func ( c * Cluster ) Flush ( segments [ ] * datapb . SegmentInfo ) {
c . eventCh <- & Event {
Type : FlushSegments ,
Data : segments ,
2021-06-30 10:20:15 +08:00
}
2021-07-12 11:03:52 +08:00
}
func ( c * Cluster ) Register ( node * NodeInfo ) {
c . eventCh <- & Event {
Type : Register ,
Data : node ,
2021-06-30 10:20:15 +08:00
}
2021-07-12 11:03:52 +08:00
}
2021-06-30 10:20:15 +08:00
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) UnRegister ( node * NodeInfo ) {
c . eventCh <- & Event {
Type : UnRegister ,
Data : node ,
}
2021-06-30 10:20:15 +08:00
}
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) Watch ( channel string , collectionID UniqueID ) {
c . eventCh <- & Event {
Type : WatchChannel ,
Data : & WatchChannelParams {
Channel : channel ,
CollectionID : collectionID ,
} ,
2021-06-28 13:28:14 +08:00
}
}
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) handleNodeEvent ( ) {
defer c . wg . Done ( )
for {
select {
case <- c . ctx . Done ( ) :
return
case e := <- c . eventCh :
switch e . Type {
case Register :
c . handleRegister ( e . Data . ( * NodeInfo ) )
case UnRegister :
c . handleUnRegister ( e . Data . ( * NodeInfo ) )
case WatchChannel :
params := e . Data . ( * WatchChannelParams )
c . handleWatchChannel ( params . Channel , params . CollectionID )
case FlushSegments :
c . handleFlush ( e . Data . ( [ ] * datapb . SegmentInfo ) )
default :
log . Warn ( "Unknow node event type" )
}
}
2021-06-29 10:46:13 +08:00
}
2021-07-12 11:03:52 +08:00
}
2021-06-28 13:28:14 +08:00
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) handleEvent ( node * NodeInfo ) {
2021-07-15 16:38:31 +08:00
log . Debug ( "start handle event" , zap . Any ( "node" , node ) )
2021-07-12 11:03:52 +08:00
ctx := node . ctx
ch := node . GetEventChannel ( )
2021-07-15 16:38:31 +08:00
version := node . Info . GetVersion ( )
2021-07-12 11:03:52 +08:00
for {
select {
case <- ctx . Done ( ) :
return
case event := <- ch :
2021-07-15 16:38:31 +08:00
cli , err := c . getOrCreateClient ( ctx , version )
if err != nil {
log . Warn ( "failed to get client" , zap . Int64 ( "nodeID" , version ) , zap . Error ( err ) )
continue
2021-06-28 13:28:14 +08:00
}
2021-07-12 11:03:52 +08:00
switch event . Type {
case Watch :
req , ok := event . Req . ( * datapb . WatchDmChannelsRequest )
if ! ok {
log . Warn ( "request type is not Watch" )
continue
}
2021-07-15 16:38:31 +08:00
log . Debug ( "receive watch event" , zap . Any ( "event" , event ) , zap . Any ( "node" , node ) )
2021-07-12 11:03:52 +08:00
tCtx , cancel := context . WithTimeout ( ctx , eventTimeout )
resp , err := cli . WatchDmChannels ( tCtx , req )
cancel ( )
if err = VerifyResponse ( resp , err ) ; err != nil {
2021-07-28 11:43:22 +08:00
log . Warn ( "failed to watch dm channels" , zap . String ( "addr" , node . Info . GetAddress ( ) ) )
2021-07-12 11:03:52 +08:00
}
c . mu . Lock ( )
2021-07-15 16:38:31 +08:00
c . nodes . SetWatched ( node . Info . GetVersion ( ) , parseChannelsFromReq ( req ) )
2021-07-12 11:03:52 +08:00
c . mu . Unlock ( )
if err = c . saveNode ( node ) ; err != nil {
log . Warn ( "failed to save node info" , zap . Any ( "node" , node ) )
continue
}
case Flush :
req , ok := event . Req . ( * datapb . FlushSegmentsRequest )
if ! ok {
log . Warn ( "request type is not Flush" )
continue
}
tCtx , cancel := context . WithTimeout ( ctx , eventTimeout )
resp , err := cli . FlushSegments ( tCtx , req )
cancel ( )
if err = VerifyResponse ( resp , err ) ; err != nil {
2021-08-20 17:50:12 +08:00
log . Warn ( "failed to flush segments" , zap . String ( "addr" , node . Info . GetAddress ( ) ) , zap . Error ( err ) )
2021-07-12 11:03:52 +08:00
}
default :
2021-07-15 16:38:31 +08:00
log . Warn ( "unknown event type" , zap . Any ( "type" , event . Type ) )
2021-07-12 11:03:52 +08:00
}
}
2021-06-28 13:28:14 +08:00
}
2021-07-12 11:03:52 +08:00
}
2021-07-15 16:38:31 +08:00
func ( c * Cluster ) getOrCreateClient ( ctx context . Context , id UniqueID ) ( types . DataNode , error ) {
c . mu . Lock ( )
node := c . nodes . GetNode ( id )
c . mu . Unlock ( )
if node == nil {
return nil , fmt . Errorf ( "node %d is not alive" , id )
}
cli := node . GetClient ( )
if cli != nil {
return cli , nil
}
var err error
cli , err = createClient ( ctx , node . Info . GetAddress ( ) )
if err != nil {
return nil , err
}
c . mu . Lock ( )
defer c . mu . Unlock ( )
c . nodes . SetClient ( node . Info . GetVersion ( ) , cli )
return cli , nil
}
2021-07-12 11:03:52 +08:00
func parseChannelsFromReq ( req * datapb . WatchDmChannelsRequest ) [ ] string {
channels := make ( [ ] string , 0 , len ( req . GetVchannels ( ) ) )
for _ , vc := range req . GetVchannels ( ) {
channels = append ( channels , vc . ChannelName )
2021-06-28 13:28:14 +08:00
}
2021-07-12 11:03:52 +08:00
return channels
2021-06-28 13:28:14 +08:00
}
2021-07-12 11:03:52 +08:00
func createClient ( ctx context . Context , addr string ) ( types . DataNode , error ) {
cli , err := grpcdatanodeclient . NewClient ( ctx , addr )
2021-06-30 10:20:15 +08:00
if err != nil {
2021-07-12 11:03:52 +08:00
return nil , err
2021-06-30 10:20:15 +08:00
}
2021-07-12 11:03:52 +08:00
if err := cli . Init ( ) ; err != nil {
return nil , err
}
if err := cli . Start ( ) ; err != nil {
return nil , err
}
return cli , nil
2021-06-30 10:20:15 +08:00
}
2021-07-12 11:03:52 +08:00
// Startup applies statup policy
func ( c * Cluster ) Startup ( nodes [ ] * NodeInfo ) {
c . wg . Add ( 1 )
go c . handleNodeEvent ( )
// before startup, we have restore all nodes recorded last time. We should
// find new created/offlined/restarted nodes and adjust channels allocation.
addNodes , deleteNodes := c . updateCluster ( nodes )
for _ , node := range addNodes {
c . Register ( node )
}
for _ , node := range deleteNodes {
c . UnRegister ( node )
}
2021-06-30 10:20:15 +08:00
}
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) updateCluster ( nodes [ ] * NodeInfo ) ( newNodes [ ] * NodeInfo , offlines [ ] * NodeInfo ) {
var onCnt , offCnt float64
currentOnline := make ( map [ int64 ] struct { } )
2021-05-26 19:06:56 +08:00
for _ , n := range nodes {
2021-07-15 16:38:31 +08:00
currentOnline [ n . Info . GetVersion ( ) ] = struct { } { }
node := c . nodes . GetNode ( n . Info . GetVersion ( ) )
2021-07-12 11:03:52 +08:00
if node == nil {
newNodes = append ( newNodes , n )
}
onCnt ++
2021-01-22 19:43:27 +08:00
}
2021-07-12 11:03:52 +08:00
currNodes := c . nodes . GetNodes ( )
for _ , node := range currNodes {
2021-07-15 16:38:31 +08:00
_ , has := currentOnline [ node . Info . GetVersion ( ) ]
2021-07-12 11:03:52 +08:00
if ! has {
offlines = append ( offlines , node )
offCnt ++
}
2021-06-29 10:46:13 +08:00
}
2021-07-12 11:03:52 +08:00
metrics . DataCoordDataNodeList . WithLabelValues ( "online" ) . Set ( onCnt )
metrics . DataCoordDataNodeList . WithLabelValues ( "offline" ) . Set ( offCnt )
return
2021-01-22 19:43:27 +08:00
}
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) handleRegister ( n * NodeInfo ) {
2021-05-26 19:06:56 +08:00
c . mu . Lock ( )
2021-07-12 11:03:52 +08:00
cNodes := c . nodes . GetNodes ( )
var nodes [ ] * NodeInfo
2021-07-28 11:43:22 +08:00
log . Debug ( "channels info before register policy applied" ,
zap . Any ( "n.Channels" , n . Info . GetChannels ( ) ) ,
zap . Any ( "buffer" , c . chanBuffer ) )
2021-07-12 11:03:52 +08:00
nodes , c . chanBuffer = c . registerPolicy ( cNodes , n , c . chanBuffer )
2021-07-28 11:43:22 +08:00
log . Debug ( "delta changes after register policy applied" ,
zap . Any ( "nodes" , nodes ) ,
zap . Any ( "buffer" , c . chanBuffer ) )
2021-07-12 11:03:52 +08:00
go c . handleEvent ( n )
c . txnSaveNodesAndBuffer ( nodes , c . chanBuffer )
for _ , node := range nodes {
2021-07-15 16:38:31 +08:00
c . nodes . SetNode ( node . Info . GetVersion ( ) , node )
2021-07-12 11:03:52 +08:00
}
c . mu . Unlock ( )
for _ , node := range nodes {
c . watch ( node )
2021-06-28 13:28:14 +08:00
}
2021-05-26 19:06:56 +08:00
}
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) handleUnRegister ( n * NodeInfo ) {
2021-05-26 19:06:56 +08:00
c . mu . Lock ( )
2021-07-15 16:38:31 +08:00
node := c . nodes . GetNode ( n . Info . GetVersion ( ) )
2021-07-12 11:03:52 +08:00
if node == nil {
c . mu . Unlock ( )
return
2021-06-09 19:32:48 +08:00
}
2021-07-12 11:03:52 +08:00
node . Dispose ( )
2021-07-15 16:38:31 +08:00
// save deleted node to kv
deleted := node . Clone ( SetChannels ( nil ) )
c . saveNode ( deleted )
c . nodes . DeleteNode ( n . Info . GetVersion ( ) )
2021-07-12 11:03:52 +08:00
cNodes := c . nodes . GetNodes ( )
2021-07-28 11:43:22 +08:00
log . Debug ( "channels info before unregister policy applied" , zap . Any ( "node.Channels" , node . Info . GetChannels ( ) ) , zap . Any ( "buffer" , c . chanBuffer ) , zap . Any ( "nodes" , cNodes ) )
2021-07-12 11:03:52 +08:00
var rets [ ] * NodeInfo
2021-06-09 18:43:50 +08:00
if len ( cNodes ) == 0 {
2021-07-15 16:38:31 +08:00
for _ , chStat := range node . Info . GetChannels ( ) {
2021-06-09 18:43:50 +08:00
chStat . State = datapb . ChannelWatchState_Uncomplete
2021-07-12 11:03:52 +08:00
c . chanBuffer = append ( c . chanBuffer , chStat )
2021-06-09 18:43:50 +08:00
}
} else {
2021-07-15 16:38:31 +08:00
rets = c . unregisterPolicy ( cNodes , node )
2021-06-09 18:43:50 +08:00
}
2021-07-28 11:43:22 +08:00
log . Debug ( "delta changes after unregister policy" , zap . Any ( "nodes" , rets ) , zap . Any ( "buffer" , c . chanBuffer ) )
2021-07-12 11:03:52 +08:00
c . txnSaveNodesAndBuffer ( rets , c . chanBuffer )
for _ , node := range rets {
2021-07-15 16:38:31 +08:00
c . nodes . SetNode ( node . Info . GetVersion ( ) , node )
2021-07-12 11:03:52 +08:00
}
c . mu . Unlock ( )
for _ , node := range rets {
c . watch ( node )
2021-06-28 13:28:14 +08:00
}
2021-05-26 19:06:56 +08:00
}
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) handleWatchChannel ( channel string , collectionID UniqueID ) {
2021-05-26 19:06:56 +08:00
c . mu . Lock ( )
2021-07-12 11:03:52 +08:00
cNodes := c . nodes . GetNodes ( )
var rets [ ] * NodeInfo
2021-06-09 18:43:50 +08:00
if len ( cNodes ) == 0 { // no nodes to assign, put into buffer
2021-07-12 11:03:52 +08:00
c . chanBuffer = append ( c . chanBuffer , & datapb . ChannelStatus {
2021-06-09 18:43:50 +08:00
Name : channel ,
CollectionID : collectionID ,
State : datapb . ChannelWatchState_Uncomplete ,
} )
} else {
2021-07-12 11:03:52 +08:00
rets = c . assignPolicy ( cNodes , channel , collectionID )
2021-06-09 18:43:50 +08:00
}
2021-07-12 11:03:52 +08:00
c . txnSaveNodesAndBuffer ( rets , c . chanBuffer )
for _ , node := range rets {
2021-07-15 16:38:31 +08:00
c . nodes . SetNode ( node . Info . GetVersion ( ) , node )
2021-07-12 11:03:52 +08:00
}
c . mu . Unlock ( )
for _ , node := range rets {
c . watch ( node )
2021-06-28 13:28:14 +08:00
}
2021-05-26 19:06:56 +08:00
}
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) handleFlush ( segments [ ] * datapb . SegmentInfo ) {
2021-05-26 19:06:56 +08:00
m := make ( map [ string ] map [ UniqueID ] [ ] UniqueID ) // channel-> map[collectionID]segmentIDs
for _ , seg := range segments {
if _ , ok := m [ seg . InsertChannel ] ; ! ok {
m [ seg . InsertChannel ] = make ( map [ UniqueID ] [ ] UniqueID )
2021-01-22 19:43:27 +08:00
}
2021-05-26 19:06:56 +08:00
m [ seg . InsertChannel ] [ seg . CollectionID ] = append ( m [ seg . InsertChannel ] [ seg . CollectionID ] , seg . ID )
2021-01-22 19:43:27 +08:00
}
2021-07-12 11:03:52 +08:00
c . mu . Lock ( )
dataNodes := c . nodes . GetNodes ( )
c . mu . Unlock ( )
2021-05-26 19:06:56 +08:00
2021-07-12 11:03:52 +08:00
channel2Node := make ( map [ string ] * NodeInfo )
2021-05-26 19:06:56 +08:00
for _ , node := range dataNodes {
2021-07-15 16:38:31 +08:00
for _ , chstatus := range node . Info . GetChannels ( ) {
2021-07-12 11:03:52 +08:00
channel2Node [ chstatus . Name ] = node
2021-01-22 19:43:27 +08:00
}
}
2021-01-28 11:24:41 +08:00
2021-05-26 19:06:56 +08:00
for ch , coll2seg := range m {
node , ok := channel2Node [ ch ]
if ! ok {
continue
}
for coll , segs := range coll2seg {
req := & datapb . FlushSegmentsRequest {
Base : & commonpb . MsgBase {
MsgType : commonpb . MsgType_Flush ,
SourceID : Params . NodeID ,
} ,
CollectionID : coll ,
SegmentIDs : segs ,
}
2021-07-12 11:03:52 +08:00
ch := node . GetEventChannel ( )
e := & NodeEvent {
Type : Flush ,
Req : req ,
2021-05-26 19:06:56 +08:00
}
2021-07-12 11:03:52 +08:00
ch <- e
}
}
}
func ( c * Cluster ) watch ( n * NodeInfo ) {
2021-07-15 16:38:31 +08:00
channelNames := make ( [ ] string , 0 )
uncompletes := make ( [ ] vchannel , 0 , len ( n . Info . Channels ) )
for _ , ch := range n . Info . GetChannels ( ) {
2021-07-12 11:03:52 +08:00
if ch . State == datapb . ChannelWatchState_Uncomplete {
2021-07-15 16:38:31 +08:00
channelNames = append ( channelNames , ch . GetName ( ) )
2021-07-12 11:03:52 +08:00
uncompletes = append ( uncompletes , vchannel {
CollectionID : ch . CollectionID ,
DmlChannel : ch . Name ,
} )
2021-05-26 19:06:56 +08:00
}
2021-01-28 11:24:41 +08:00
}
2021-07-12 11:03:52 +08:00
if len ( uncompletes ) == 0 {
return // all set, just return
}
2021-07-28 11:43:22 +08:00
log . Debug ( "plan to watch channel" ,
zap . String ( "node" , n . Info . GetAddress ( ) ) ,
zap . Int64 ( "version" , n . Info . GetVersion ( ) ) ,
zap . Strings ( "channels" , channelNames ) )
2021-07-12 11:03:52 +08:00
vchanInfos , err := c . posProvider . GetVChanPositions ( uncompletes , true )
if err != nil {
log . Warn ( "get vchannel position failed" , zap . Error ( err ) )
return
}
req := & datapb . WatchDmChannelsRequest {
Base : & commonpb . MsgBase {
SourceID : Params . NodeID ,
} ,
Vchannels : vchanInfos ,
}
e := & NodeEvent {
Type : Watch ,
Req : req ,
}
ch := n . GetEventChannel ( )
2021-07-28 11:43:22 +08:00
log . Debug ( "put watch event to node channel" ,
zap . Any ( "event" , e ) ,
zap . Any ( "node.version" , n . Info . GetVersion ( ) ) ,
zap . String ( "node.address" , n . Info . GetAddress ( ) ) )
2021-07-12 11:03:52 +08:00
ch <- e
}
func ( c * Cluster ) saveNode ( n * NodeInfo ) error {
2021-07-15 16:38:31 +08:00
key := fmt . Sprintf ( "%s%d" , clusterPrefix , n . Info . GetVersion ( ) )
value := proto . MarshalTextString ( n . Info )
2021-07-12 11:03:52 +08:00
return c . kv . Save ( key , value )
}
func ( c * Cluster ) txnSaveNodesAndBuffer ( nodes [ ] * NodeInfo , buffer [ ] * datapb . ChannelStatus ) error {
if len ( nodes ) == 0 && len ( buffer ) == 0 {
return nil
}
data := make ( map [ string ] string )
for _ , n := range nodes {
2021-07-15 16:38:31 +08:00
key := fmt . Sprintf ( "%s%d" , clusterPrefix , n . Info . GetVersion ( ) )
value := proto . MarshalTextString ( n . Info )
2021-07-12 11:03:52 +08:00
data [ key ] = value
}
// short cut, reusing datainfo to store array of channel status
bufNode := & datapb . DataNodeInfo {
Channels : buffer ,
}
data [ clusterBuffer ] = proto . MarshalTextString ( bufNode )
return c . kv . MultiSave ( data )
}
func ( c * Cluster ) GetNodes ( ) [ ] * NodeInfo {
c . mu . Lock ( )
defer c . mu . Unlock ( )
return c . nodes . GetNodes ( )
2021-01-28 11:24:41 +08:00
}
2021-02-02 18:53:10 +08:00
2021-07-12 11:03:52 +08:00
func ( c * Cluster ) Close ( ) {
c . cancel ( )
c . wg . Wait ( )
2021-05-26 19:06:56 +08:00
c . mu . Lock ( )
defer c . mu . Unlock ( )
2021-07-12 11:03:52 +08:00
nodes := c . nodes . GetNodes ( )
for _ , node := range nodes {
node . Dispose ( )
}
2021-02-02 18:53:10 +08:00
}