milvus/internal/querycoordv2/observers/collection_observer_test.go
wei liu 266f8ef1f5
fix: Search may return less result after qn recover (#36549)
issue: #36293 #36242
after qn recover, delegator may be loaded in new node, after all segment
has been loaded, delegator becomes serviceable. but delegator's target
version hasn't been synced, and if search/query comes, delegator will
use wrong target version to filter out a empty segment list, which
caused empty search result.

This pr will block delegator's serviceable status until target version
is synced

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
2024-11-12 16:34:28 +08:00

499 lines
16 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package observers
import (
"context"
"testing"
"time"
"github.com/samber/lo"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
clientv3 "go.etcd.io/etcd/client/v3"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/metastore"
"github.com/milvus-io/milvus/internal/metastore/kv/querycoord"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/checkers"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/util/proxyutil"
"github.com/milvus-io/milvus/pkg/kv"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/etcd"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
type CollectionObserverSuite struct {
suite.Suite
// Data
collections []int64
partitions map[int64][]int64 // CollectionID -> PartitionIDs
channels map[int64][]*meta.DmChannel
segments map[int64][]*datapb.SegmentInfo // CollectionID -> []datapb.SegmentInfo
loadTypes map[int64]querypb.LoadType
replicaNumber map[int64]int32
nodes []int64
// Mocks
idAllocator func() (int64, error)
etcd *clientv3.Client
kv kv.MetaKv
store metastore.QueryCoordCatalog
broker *meta.MockBroker
cluster *session.MockCluster
proxyManager *proxyutil.MockProxyClientManager
// Dependencies
dist *meta.DistributionManager
meta *meta.Meta
targetMgr *meta.TargetManager
targetObserver *TargetObserver
checkerController *checkers.CheckerController
nodeMgr *session.NodeManager
// Test object
ob *CollectionObserver
}
func (suite *CollectionObserverSuite) SetupSuite() {
paramtable.Init()
suite.collections = []int64{100, 101, 102, 103}
suite.partitions = map[int64][]int64{
100: {10},
101: {11, 12},
102: {13},
103: {14},
}
suite.channels = map[int64][]*meta.DmChannel{
100: {
meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "100-dmc0",
}),
meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 100,
ChannelName: "100-dmc1",
}),
},
101: {
meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 101,
ChannelName: "101-dmc0",
}),
meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 101,
ChannelName: "101-dmc1",
}),
},
102: {
meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 102,
ChannelName: "102-dmc0",
}),
},
103: {
meta.DmChannelFromVChannel(&datapb.VchannelInfo{
CollectionID: 103,
ChannelName: "103-dmc0",
}),
},
}
suite.segments = map[int64][]*datapb.SegmentInfo{
100: {
&datapb.SegmentInfo{
ID: 1,
CollectionID: 100,
PartitionID: 10,
InsertChannel: "100-dmc0",
},
&datapb.SegmentInfo{
ID: 2,
CollectionID: 100,
PartitionID: 10,
InsertChannel: "100-dmc1",
},
},
101: {
&datapb.SegmentInfo{
ID: 3,
CollectionID: 101,
PartitionID: 11,
InsertChannel: "101-dmc0",
},
&datapb.SegmentInfo{
ID: 4,
CollectionID: 101,
PartitionID: 12,
InsertChannel: "101-dmc1",
},
},
102: genSegmentsInfo(999, 5, 102, 13, "102-dmc0"),
103: genSegmentsInfo(10, 2000, 103, 14, "103-dmc0"),
}
suite.loadTypes = map[int64]querypb.LoadType{
100: querypb.LoadType_LoadCollection,
101: querypb.LoadType_LoadPartition,
102: querypb.LoadType_LoadCollection,
103: querypb.LoadType_LoadCollection,
}
suite.replicaNumber = map[int64]int32{
100: 1,
101: 1,
102: 1,
103: 2,
}
suite.nodes = []int64{1, 2, 3}
suite.proxyManager = proxyutil.NewMockProxyClientManager(suite.T())
suite.proxyManager.EXPECT().InvalidateCollectionMetaCache(mock.Anything, mock.Anything, mock.Anything).Return(nil).Maybe()
}
func (suite *CollectionObserverSuite) SetupTest() {
// Mocks
var err error
suite.idAllocator = RandomIncrementIDAllocator()
log.Debug("create embedded etcd KV...")
config := GenerateEtcdConfig()
client, err := etcd.GetEtcdClient(
config.UseEmbedEtcd.GetAsBool(),
config.EtcdUseSSL.GetAsBool(),
config.Endpoints.GetAsStrings(),
config.EtcdTLSCert.GetValue(),
config.EtcdTLSKey.GetValue(),
config.EtcdTLSCACert.GetValue(),
config.EtcdTLSMinVersion.GetValue())
suite.Require().NoError(err)
suite.kv = etcdkv.NewEtcdKV(client, Params.EtcdCfg.MetaRootPath.GetValue()+"-"+RandomMetaRootPath())
suite.Require().NoError(err)
log.Debug("create meta store...")
suite.store = querycoord.NewCatalog(suite.kv)
// Dependencies
suite.dist = meta.NewDistributionManager()
suite.nodeMgr = session.NewNodeManager()
suite.meta = meta.NewMeta(suite.idAllocator, suite.store, suite.nodeMgr)
suite.broker = meta.NewMockBroker(suite.T())
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
suite.cluster = session.NewMockCluster(suite.T())
suite.targetObserver = NewTargetObserver(suite.meta,
suite.targetMgr,
suite.dist,
suite.broker,
suite.cluster,
suite.nodeMgr,
)
suite.checkerController = &checkers.CheckerController{}
mockCluster := session.NewMockCluster(suite.T())
mockCluster.EXPECT().SyncDistribution(mock.Anything, mock.Anything, mock.Anything).Return(merr.Success(), nil).Maybe()
// Test object
suite.ob = NewCollectionObserver(
suite.dist,
suite.meta,
suite.targetMgr,
suite.targetObserver,
suite.checkerController,
suite.proxyManager,
)
for _, collection := range suite.collections {
suite.broker.EXPECT().GetPartitions(mock.Anything, collection).Return(suite.partitions[collection], nil).Maybe()
}
suite.targetObserver.Start()
suite.ob.Start()
suite.loadAll()
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 1,
}))
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 2,
}))
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 3,
}))
}
func (suite *CollectionObserverSuite) TearDownTest() {
suite.ob.Stop()
suite.targetObserver.Stop()
suite.kv.Close()
}
func (suite *CollectionObserverSuite) TestObserve() {
const (
timeout = 3 * time.Second
)
// time before load
time := suite.meta.GetCollection(suite.collections[2]).UpdatedAt
// Not timeout
paramtable.Get().Save(Params.QueryCoordCfg.LoadTimeoutSeconds.Key, "3")
// Collection 100 loaded before timeout,
// collection 101 timeout
suite.dist.LeaderViewManager.Update(1, &meta.LeaderView{
ID: 1,
CollectionID: 100,
Channel: "100-dmc0",
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1, Version: 0}},
})
view := &meta.LeaderView{
ID: 2,
CollectionID: 103,
Channel: "103-dmc0",
Segments: make(map[int64]*querypb.SegmentDist),
}
suite.dist.LeaderViewManager.Update(2, &meta.LeaderView{
ID: 2,
CollectionID: 100,
Channel: "100-dmc1",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2, Version: 0}},
}, view)
view1 := &meta.LeaderView{
ID: 3,
CollectionID: 102,
Channel: "102-dmc0",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 5, Version: 0}},
}
segmentsInfo, ok := suite.segments[103]
suite.True(ok)
view2 := &meta.LeaderView{
ID: 3,
CollectionID: 103,
Channel: "103-dmc0",
Segments: make(map[int64]*querypb.SegmentDist),
}
for _, segment := range segmentsInfo {
view2.Segments[segment.GetID()] = &querypb.SegmentDist{
NodeID: 3, Version: 0,
}
view.Segments[segment.GetID()] = &querypb.SegmentDist{
NodeID: 2, Version: 0,
}
}
suite.dist.LeaderViewManager.Update(3, view1, view2)
suite.broker.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(nil, nil).Maybe()
suite.broker.EXPECT().ListIndexes(mock.Anything, mock.Anything).Return(nil, nil).Maybe()
suite.cluster.EXPECT().SyncDistribution(mock.Anything, mock.Anything, mock.Anything).Return(merr.Success(), nil).Maybe()
suite.Eventually(func() bool {
return suite.isCollectionLoadedContinue(suite.collections[2], time)
}, timeout-1, timeout/10)
suite.Eventually(func() bool {
return suite.isCollectionLoaded(suite.collections[0])
}, timeout*2, timeout/10)
suite.Eventually(func() bool {
return suite.isCollectionTimeout(suite.collections[1])
}, timeout*2, timeout/10)
suite.Eventually(func() bool {
return suite.isCollectionLoaded(suite.collections[3])
}, timeout*2, timeout/10)
}
func (suite *CollectionObserverSuite) TestObservePartition() {
const (
timeout = 3 * time.Second
)
paramtable.Get().Save(Params.QueryCoordCfg.LoadTimeoutSeconds.Key, "3")
// Partition 10 loaded
// Partition 11 timeout
suite.dist.LeaderViewManager.Update(1, &meta.LeaderView{
ID: 1,
CollectionID: 100,
Channel: "100-dmc0",
Segments: map[int64]*querypb.SegmentDist{1: {NodeID: 1, Version: 0}},
}, &meta.LeaderView{
ID: 1,
CollectionID: 101,
Channel: "",
Segments: map[int64]*querypb.SegmentDist{},
})
suite.dist.LeaderViewManager.Update(2, &meta.LeaderView{
ID: 2,
CollectionID: 100,
Channel: "100-dmc1",
Segments: map[int64]*querypb.SegmentDist{2: {NodeID: 2, Version: 0}},
})
suite.Eventually(func() bool {
return suite.isPartitionLoaded(suite.partitions[100][0])
}, timeout*2, timeout/10)
suite.Eventually(func() bool {
return suite.isPartitionTimeout(suite.collections[1], suite.partitions[101][0])
}, timeout*2, timeout/10)
}
func (suite *CollectionObserverSuite) isCollectionLoaded(collection int64) bool {
exist := suite.meta.Exist(collection)
percentage := suite.meta.CalculateLoadPercentage(collection)
status := suite.meta.CalculateLoadStatus(collection)
replicas := suite.meta.ReplicaManager.GetByCollection(collection)
channels := suite.targetMgr.GetDmChannelsByCollection(collection, meta.CurrentTarget)
segments := suite.targetMgr.GetSealedSegmentsByCollection(collection, meta.CurrentTarget)
return exist &&
percentage == 100 &&
status == querypb.LoadStatus_Loaded &&
len(replicas) == int(suite.replicaNumber[collection]) &&
len(channels) == len(suite.channels[collection]) &&
len(segments) == len(suite.segments[collection])
}
func (suite *CollectionObserverSuite) isPartitionLoaded(partitionID int64) bool {
partition := suite.meta.GetPartition(partitionID)
if partition == nil {
return false
}
collection := partition.GetCollectionID()
percentage := suite.meta.GetPartitionLoadPercentage(partitionID)
status := partition.GetStatus()
channels := suite.targetMgr.GetDmChannelsByCollection(collection, meta.CurrentTarget)
segments := suite.targetMgr.GetSealedSegmentsByPartition(collection, partitionID, meta.CurrentTarget)
expectedSegments := lo.Filter(suite.segments[collection], func(seg *datapb.SegmentInfo, _ int) bool {
return seg.PartitionID == partitionID
})
return percentage == 100 &&
status == querypb.LoadStatus_Loaded &&
len(channels) == len(suite.channels[collection]) &&
len(segments) == len(expectedSegments)
}
func (suite *CollectionObserverSuite) isCollectionTimeout(collection int64) bool {
exist := suite.meta.Exist(collection)
replicas := suite.meta.ReplicaManager.GetByCollection(collection)
channels := suite.targetMgr.GetDmChannelsByCollection(collection, meta.CurrentTarget)
segments := suite.targetMgr.GetSealedSegmentsByCollection(collection, meta.CurrentTarget)
return !(exist ||
len(replicas) > 0 ||
len(channels) > 0 ||
len(segments) > 0)
}
func (suite *CollectionObserverSuite) isPartitionTimeout(collection int64, partitionID int64) bool {
partition := suite.meta.GetPartition(partitionID)
segments := suite.targetMgr.GetSealedSegmentsByPartition(collection, partitionID, meta.CurrentTarget)
return partition == nil && len(segments) == 0
}
func (suite *CollectionObserverSuite) isCollectionLoadedContinue(collection int64, beforeTime time.Time) bool {
return suite.meta.GetCollection(collection).UpdatedAt.After(beforeTime)
}
func (suite *CollectionObserverSuite) loadAll() {
for _, collection := range suite.collections {
suite.load(collection)
}
suite.targetMgr.UpdateCollectionCurrentTarget(suite.collections[0])
suite.targetMgr.UpdateCollectionNextTarget(suite.collections[0])
suite.targetMgr.UpdateCollectionCurrentTarget(suite.collections[2])
suite.targetMgr.UpdateCollectionNextTarget(suite.collections[2])
}
func (suite *CollectionObserverSuite) load(collection int64) {
// Mock meta data
replicas, err := suite.meta.ReplicaManager.Spawn(collection, map[string]int{meta.DefaultResourceGroupName: int(suite.replicaNumber[collection])}, nil)
suite.NoError(err)
for _, replica := range replicas {
replica.AddRWNode(suite.nodes...)
}
err = suite.meta.ReplicaManager.Put(replicas...)
suite.NoError(err)
suite.meta.PutCollection(&meta.Collection{
CollectionLoadInfo: &querypb.CollectionLoadInfo{
CollectionID: collection,
ReplicaNumber: suite.replicaNumber[collection],
Status: querypb.LoadStatus_Loading,
LoadType: suite.loadTypes[collection],
},
LoadPercentage: 0,
CreatedAt: time.Now(),
})
for _, partition := range suite.partitions[collection] {
suite.meta.PutPartition(&meta.Partition{
PartitionLoadInfo: &querypb.PartitionLoadInfo{
CollectionID: collection,
PartitionID: partition,
ReplicaNumber: suite.replicaNumber[collection],
Status: querypb.LoadStatus_Loading,
},
LoadPercentage: 0,
CreatedAt: time.Now(),
})
}
dmChannels := make([]*datapb.VchannelInfo, 0)
for _, channel := range suite.channels[collection] {
dmChannels = append(dmChannels, &datapb.VchannelInfo{
CollectionID: collection,
ChannelName: channel.GetChannelName(),
})
}
allSegments := make([]*datapb.SegmentInfo, 0) // partitionID -> segments
for _, segment := range suite.segments[collection] {
allSegments = append(allSegments, &datapb.SegmentInfo{
ID: segment.GetID(),
PartitionID: segment.PartitionID,
InsertChannel: segment.GetInsertChannel(),
})
}
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, collection).Return(dmChannels, allSegments, nil)
suite.targetMgr.UpdateCollectionNextTarget(collection)
suite.ob.LoadCollection(context.Background(), collection)
}
func TestCollectionObserver(t *testing.T) {
suite.Run(t, new(CollectionObserverSuite))
}
func genSegmentsInfo(count int, start int, collID int64, partitionID int64, insertChannel string) []*datapb.SegmentInfo {
ret := make([]*datapb.SegmentInfo, 0, count)
for i := 0; i < count; i++ {
segment := &datapb.SegmentInfo{
ID: int64(start + i),
CollectionID: collID,
PartitionID: partitionID,
InsertChannel: insertChannel,
}
ret = append(ret, segment)
}
return ret
}