// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package datacoord import ( "context" "errors" "testing" "time" "github.com/golang/protobuf/proto" "github.com/milvus-io/milvus/internal/kv" etcdkv "github.com/milvus-io/milvus/internal/kv/etcd" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "stathat.com/c/consistent" ) func getMetaKv(t *testing.T) kv.MetaKv { rootPath := "/etcd/test/root/" + t.Name() metakv, err := etcdkv.NewMetaKvFactory(rootPath, &Params.EtcdCfg) require.NoError(t, err) return metakv } func TestClusterCreate(t *testing.T) { kv := getMetaKv(t) defer func() { kv.RemoveWithPrefix("") kv.Close() }() t.Run("startup normally", func(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" info := &NodeInfo{ NodeID: 1, Address: addr, } nodes := []*NodeInfo{info} err = cluster.Startup(ctx, nodes) assert.Nil(t, err) dataNodes := sessionManager.GetSessions() assert.EqualValues(t, 1, len(dataNodes)) assert.EqualValues(t, "localhost:8080", dataNodes[0].info.Address) }) t.Run("startup with existed channel data", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() var err error info1 := &datapb.ChannelWatchInfo{ Vchan: &datapb.VchannelInfo{ CollectionID: 1, ChannelName: "channel1", }, } info1Data, err := proto.Marshal(info1) assert.Nil(t, err) err = kv.Save(Params.DataCoordCfg.ChannelWatchSubPath+"/1/channel1", string(info1Data)) assert.Nil(t, err) sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() err = cluster.Startup(ctx, []*NodeInfo{{NodeID: 1, Address: "localhost:9999"}}) assert.Nil(t, err) channels := channelManager.GetChannels() assert.EqualValues(t, []*NodeChannelInfo{{1, []*channel{{Name: "channel1", CollectionID: 1}}}}, channels) }) t.Run("remove all nodes and restart with other nodes", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) addr := "localhost:8080" info := &NodeInfo{ NodeID: 1, Address: addr, } nodes := []*NodeInfo{info} err = cluster.Startup(ctx, nodes) assert.Nil(t, err) err = cluster.UnRegister(info) assert.Nil(t, err) sessions := sessionManager.GetSessions() assert.Empty(t, sessions) cluster.Close() sessionManager2 := NewSessionManager() channelManager2, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) clusterReload := NewCluster(sessionManager2, channelManager2) defer clusterReload.Close() addr = "localhost:8081" info = &NodeInfo{ NodeID: 2, Address: addr, } nodes = []*NodeInfo{info} err = clusterReload.Startup(ctx, nodes) assert.Nil(t, err) sessions = sessionManager2.GetSessions() assert.EqualValues(t, 1, len(sessions)) assert.EqualValues(t, 2, sessions[0].info.NodeID) assert.EqualValues(t, addr, sessions[0].info.Address) channels := channelManager2.GetChannels() assert.EqualValues(t, 1, len(channels)) assert.EqualValues(t, 2, channels[0].NodeID) }) t.Run("loadKv Fails", func(t *testing.T) { defer kv.RemoveWithPrefix("") fkv := &loadPrefixFailKV{MetaKv: kv} _, err := NewChannelManager(fkv, newMockHandler()) assert.NotNil(t, err) }) } // a mock kv that always fail when LoadWithPrefix type loadPrefixFailKV struct { kv.MetaKv } // LoadWithPrefix override behavior func (kv *loadPrefixFailKV) LoadWithPrefix(key string) ([]string, []string, error) { return []string{}, []string{}, errors.New("mocked fail") } func TestRegister(t *testing.T) { kv := getMetaKv(t) defer func() { kv.RemoveWithPrefix("") kv.Close() }() t.Run("register to empty cluster", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" err = cluster.Startup(ctx, nil) assert.Nil(t, err) info := &NodeInfo{ NodeID: 1, Address: addr, } err = cluster.Register(info) assert.Nil(t, err) sessions := sessionManager.GetSessions() assert.EqualValues(t, 1, len(sessions)) assert.EqualValues(t, "localhost:8080", sessions[0].info.Address) }) t.Run("register to empty cluster with buffer channels", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) err = channelManager.Watch(&channel{ Name: "ch1", CollectionID: 0, }) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" err = cluster.Startup(ctx, nil) assert.Nil(t, err) info := &NodeInfo{ NodeID: 1, Address: addr, } err = cluster.Register(info) assert.Nil(t, err) bufferChannels := channelManager.GetBufferChannels() assert.Empty(t, bufferChannels.Channels) nodeChannels := channelManager.GetChannels() assert.EqualValues(t, 1, len(nodeChannels)) assert.EqualValues(t, 1, nodeChannels[0].NodeID) assert.EqualValues(t, "ch1", nodeChannels[0].Channels[0].Name) }) t.Run("register and restart with no channel", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) addr := "localhost:8080" err = cluster.Startup(ctx, nil) assert.Nil(t, err) info := &NodeInfo{ NodeID: 1, Address: addr, } err = cluster.Register(info) assert.Nil(t, err) cluster.Close() sessionManager2 := NewSessionManager() channelManager2, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) restartCluster := NewCluster(sessionManager2, channelManager2) defer restartCluster.Close() channels := channelManager2.GetChannels() assert.Empty(t, channels) }) } func TestUnregister(t *testing.T) { kv := getMetaKv(t) defer func() { kv.RemoveWithPrefix("") kv.Close() }() t.Run("remove node after unregister", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" info := &NodeInfo{ Address: addr, NodeID: 1, } nodes := []*NodeInfo{info} err = cluster.Startup(ctx, nodes) assert.Nil(t, err) err = cluster.UnRegister(nodes[0]) assert.Nil(t, err) sessions := sessionManager.GetSessions() assert.Empty(t, sessions) }) t.Run("move channels to online nodes after unregister", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() nodeInfo1 := &NodeInfo{ Address: "localhost:8080", NodeID: 1, } nodeInfo2 := &NodeInfo{ Address: "localhost:8081", NodeID: 2, } nodes := []*NodeInfo{nodeInfo1, nodeInfo2} err = cluster.Startup(ctx, nodes) assert.Nil(t, err) err = cluster.Watch("ch1", 1) assert.Nil(t, err) err = cluster.UnRegister(nodeInfo1) assert.Nil(t, err) channels := channelManager.GetChannels() assert.EqualValues(t, 1, len(channels)) assert.EqualValues(t, 2, channels[0].NodeID) assert.EqualValues(t, 1, len(channels[0].Channels)) assert.EqualValues(t, "ch1", channels[0].Channels[0].Name) }) t.Run("remove all channels after unregsiter", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() var mockSessionCreator = func(ctx context.Context, addr string) (types.DataNode, error) { return newMockDataNodeClient(1, nil) } sessionManager := NewSessionManager(withSessionCreator(mockSessionCreator)) channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() nodeInfo := &NodeInfo{ Address: "localhost:8080", NodeID: 1, } err = cluster.Startup(ctx, []*NodeInfo{nodeInfo}) assert.Nil(t, err) err = cluster.Watch("ch_1", 1) assert.Nil(t, err) err = cluster.UnRegister(nodeInfo) assert.Nil(t, err) channels := channelManager.GetChannels() assert.Empty(t, channels) channel := channelManager.GetBufferChannels() assert.NotNil(t, channel) assert.EqualValues(t, 1, len(channel.Channels)) assert.EqualValues(t, "ch_1", channel.Channels[0].Name) }) } func TestWatchIfNeeded(t *testing.T) { kv := getMetaKv(t) defer func() { kv.RemoveWithPrefix("") kv.Close() }() t.Run("add deplicated channel to cluster", func(t *testing.T) { defer kv.RemoveWithPrefix("") ctx, cancel := context.WithCancel(context.TODO()) defer cancel() var mockSessionCreator = func(ctx context.Context, addr string) (types.DataNode, error) { return newMockDataNodeClient(1, nil) } sessionManager := NewSessionManager(withSessionCreator(mockSessionCreator)) channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" info := &NodeInfo{ Address: addr, NodeID: 1, } err = cluster.Startup(ctx, []*NodeInfo{info}) assert.Nil(t, err) err = cluster.Watch("ch1", 1) assert.Nil(t, err) channels := channelManager.GetChannels() assert.EqualValues(t, 1, len(channels)) assert.EqualValues(t, "ch1", channels[0].Channels[0].Name) }) t.Run("watch channel to empty cluster", func(t *testing.T) { defer kv.RemoveWithPrefix("") sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() err = cluster.Watch("ch1", 1) assert.Nil(t, err) channels := channelManager.GetChannels() assert.Empty(t, channels) channel := channelManager.GetBufferChannels() assert.NotNil(t, channel) assert.EqualValues(t, "ch1", channel.Channels[0].Name) }) } func TestConsistentHashPolicy(t *testing.T) { kv := getMetaKv(t) defer func() { kv.RemoveWithPrefix("") kv.Close() }() sessionManager := NewSessionManager() chash := consistent.New() factory := NewConsistentHashChannelPolicyFactory(chash) channelManager, err := NewChannelManager(kv, newMockHandler(), withFactory(factory)) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() hash := consistent.New() hash.Add("1") hash.Add("2") hash.Add("3") nodeInfo1 := &NodeInfo{ NodeID: 1, Address: "localhost:1111", } nodeInfo2 := &NodeInfo{ NodeID: 2, Address: "localhost:2222", } nodeInfo3 := &NodeInfo{ NodeID: 3, Address: "localhost:3333", } err = cluster.Register(nodeInfo1) assert.Nil(t, err) err = cluster.Register(nodeInfo2) assert.Nil(t, err) err = cluster.Register(nodeInfo3) assert.Nil(t, err) channels := []string{"ch1", "ch2", "ch3"} for _, c := range channels { err = cluster.Watch(c, 1) assert.Nil(t, err) idstr, err := hash.Get(c) assert.Nil(t, err) id, err := deformatNodeID(idstr) assert.Nil(t, err) match := channelManager.Match(id, c) assert.True(t, match) } hash.Remove("1") err = cluster.UnRegister(nodeInfo1) assert.Nil(t, err) for _, c := range channels { idstr, err := hash.Get(c) assert.Nil(t, err) id, err := deformatNodeID(idstr) assert.Nil(t, err) match := channelManager.Match(id, c) assert.True(t, match) } hash.Remove("2") err = cluster.UnRegister(nodeInfo2) assert.Nil(t, err) for _, c := range channels { idstr, err := hash.Get(c) assert.Nil(t, err) id, err := deformatNodeID(idstr) assert.Nil(t, err) match := channelManager.Match(id, c) assert.True(t, match) } hash.Remove("3") err = cluster.UnRegister(nodeInfo3) assert.Nil(t, err) bufferChannels := channelManager.GetBufferChannels() assert.EqualValues(t, 3, len(bufferChannels.Channels)) } func TestCluster_Flush(t *testing.T) { kv := getMetaKv(t) defer func() { kv.RemoveWithPrefix("") kv.Close() }() ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" info := &NodeInfo{ Address: addr, NodeID: 1, } nodes := []*NodeInfo{info} err = cluster.Startup(ctx, nodes) assert.Nil(t, err) err = cluster.Watch("chan-1", 1) assert.NoError(t, err) // flush empty should impact nothing assert.NotPanics(t, func() { err := cluster.Flush(context.Background(), 1, "chan-1", []*datapb.SegmentInfo{}) assert.NoError(t, err) }) // flush not watched channel assert.NotPanics(t, func() { err := cluster.Flush(context.Background(), 1, "chan-2", []*datapb.SegmentInfo{{ID: 1}}) assert.Error(t, err) }) // flush from wrong datanode assert.NotPanics(t, func() { err := cluster.Flush(context.Background(), 2, "chan-1", []*datapb.SegmentInfo{{ID: 1}}) assert.Error(t, err) }) //TODO add a method to verify datanode has flush request after client injection is available } func TestCluster_Import(t *testing.T) { kv := getMetaKv(t) defer func() { kv.RemoveWithPrefix("") kv.Close() }() ctx, cancel := context.WithTimeout(context.TODO(), 100*time.Millisecond) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" info := &NodeInfo{ Address: addr, NodeID: 1, } nodes := []*NodeInfo{info} err = cluster.Startup(ctx, nodes) assert.Nil(t, err) err = cluster.Watch("chan-1", 1) assert.NoError(t, err) assert.NotPanics(t, func() { cluster.Import(ctx, 1, &datapb.ImportTaskRequest{}) }) time.Sleep(500 * time.Millisecond) } func TestCluster_ReCollectSegmentStats(t *testing.T) { kv := getMetaKv(t) defer func() { kv.RemoveWithPrefix("") kv.Close() }() t.Run("recollect succeed", func(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() var mockSessionCreator = func(ctx context.Context, addr string) (types.DataNode, error) { return newMockDataNodeClient(1, nil) } sessionManager := NewSessionManager(withSessionCreator(mockSessionCreator)) channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" info := &NodeInfo{ Address: addr, NodeID: 1, } nodes := []*NodeInfo{info} err = cluster.Startup(ctx, nodes) assert.Nil(t, err) err = cluster.Watch("chan-1", 1) assert.NoError(t, err) assert.NotPanics(t, func() { cluster.ReCollectSegmentStats(ctx, 1) }) time.Sleep(500 * time.Millisecond) }) t.Run("recollect failed", func(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() sessionManager := NewSessionManager() channelManager, err := NewChannelManager(kv, newMockHandler()) assert.Nil(t, err) cluster := NewCluster(sessionManager, channelManager) defer cluster.Close() addr := "localhost:8080" info := &NodeInfo{ Address: addr, NodeID: 1, } nodes := []*NodeInfo{info} err = cluster.Startup(ctx, nodes) assert.Nil(t, err) err = cluster.Watch("chan-1", 1) assert.NoError(t, err) assert.NotPanics(t, func() { cluster.ReCollectSegmentStats(ctx, 1) }) time.Sleep(500 * time.Millisecond) }) }