fix: Choose wrong shard leader during balance channel (#29529)

issue: #29523

readable shard leader should still be the old one during channel
balance, if the new shard leader is not ready.
This PR fixed that query coord choose wrong shard leader during balance
channel

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2023-12-28 15:22:51 +08:00 committed by GitHub
parent 6597c72992
commit 5474bce9d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -881,9 +881,8 @@ func (s *Server) GetShardLeaders(ctx context.Context, req *querypb.GetShardLeade
log := log.With(zap.String("channel", channel.GetChannelName()))
leaders := s.dist.LeaderViewManager.GetLeadersByShard(channel.GetChannelName())
leaders = filterDupLeaders(s.meta.ReplicaManager, leaders)
ids := make([]int64, 0, len(leaders))
addrs := make([]string, 0, len(leaders))
readableLeaders := make(map[int64]*meta.LeaderView)
var channelErr error
if len(leaders) == 0 {
@ -944,11 +943,10 @@ func (s *Server) GetShardLeaders(ctx context.Context, req *querypb.GetShardLeade
continue
}
ids = append(ids, info.ID())
addrs = append(addrs, info.Addr())
readableLeaders[leader.ID] = leader
}
if len(ids) == 0 {
if len(readableLeaders) == 0 {
msg := fmt.Sprintf("channel %s is not available in any replica", channel.GetChannelName())
log.Warn(msg, zap.Error(channelErr))
resp.Status = merr.Status(
@ -957,6 +955,15 @@ func (s *Server) GetShardLeaders(ctx context.Context, req *querypb.GetShardLeade
return resp, nil
}
readableLeaders = filterDupLeaders(s.meta.ReplicaManager, readableLeaders)
ids := make([]int64, 0, len(leaders))
addrs := make([]string, 0, len(leaders))
for _, leader := range readableLeaders {
info := s.nodeMgr.Get(leader.ID)
ids = append(ids, info.ID())
addrs = append(addrs, info.Addr())
}
resp.Shards = append(resp.Shards, &querypb.ShardLeadersList{
ChannelName: channel.GetChannelName(),
NodeIds: ids,