fix: Balance channel may stuck at increasing replica number case (#37641)

issue: #37640
fix the pr #36549
cause balance channel will wait until new delegator becomes serviceable,
but new delegator need to sync target version then becomes serviceable,
and sync target version need to be wait all replica load done. so if
increasing replica number and balance channel happens at same time,
logic dead lock occurs.

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2024-11-14 10:08:31 +08:00 committed by GitHub
parent caf207f5a0
commit 1304b40552
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -388,9 +388,8 @@ func (ob *TargetObserver) shouldUpdateCurrentTarget(ctx context.Context, collect
})
collectionReadyLeaders = append(collectionReadyLeaders, channelReadyLeaders...)
nodes := lo.Map(channelReadyLeaders, func(view *meta.LeaderView, _ int) int64 { return view.ID })
group := utils.GroupNodesByReplica(ob.meta.ReplicaManager, collectionID, nodes)
if int32(len(group)) < replicaNum {
// to avoid stuck here in dynamic increase replica case, we just check available delegator number
if int32(len(collectionReadyLeaders)) < replicaNum {
log.RatedInfo(10, "channel not ready",
zap.Int("readyReplicaNum", len(channelReadyLeaders)),
zap.String("channelName", channel),