// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package querycoordv2

import (
	"context"
	"errors"
	"fmt"
	"sync"
	"time"

	"github.com/milvus-io/milvus-proto/go-api/commonpb"
	"github.com/milvus-io/milvus-proto/go-api/milvuspb"
	"github.com/milvus-io/milvus/internal/log"
	"github.com/milvus-io/milvus/internal/metrics"
	"github.com/milvus-io/milvus/internal/proto/internalpb"
	"github.com/milvus-io/milvus/internal/proto/querypb"
	"github.com/milvus-io/milvus/internal/querycoordv2/job"
	"github.com/milvus-io/milvus/internal/querycoordv2/meta"
	"github.com/milvus-io/milvus/internal/querycoordv2/utils"
	"github.com/milvus-io/milvus/internal/util/errorutil"
	"github.com/milvus-io/milvus/internal/util/metricsinfo"
	"github.com/milvus-io/milvus/internal/util/paramtable"
	"github.com/milvus-io/milvus/internal/util/timerecord"
	"github.com/milvus-io/milvus/internal/util/typeutil"
	"github.com/samber/lo"
	"go.uber.org/multierr"
	"go.uber.org/zap"
	"golang.org/x/sync/errgroup"
)

var (
	successStatus = utils.WrapStatus(commonpb.ErrorCode_Success, "")
)

func (s *Server) ShowCollections(ctx context.Context, req *querypb.ShowCollectionsRequest) (*querypb.ShowCollectionsResponse, error) {
	log.Ctx(ctx).Info("show collections request received", zap.Int64s("collections", req.GetCollectionIDs()))

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to show collections"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return &querypb.ShowCollectionsResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy),
		}, nil
	}
	defer meta.GlobalFailedLoadCache.TryExpire()

	isGetAll := false
	collectionSet := typeutil.NewUniqueSet(req.GetCollectionIDs()...)
	if len(req.GetCollectionIDs()) == 0 {
		for _, collection := range s.meta.GetAllCollections() {
			collectionSet.Insert(collection.GetCollectionID())
		}
		for _, partition := range s.meta.GetAllPartitions() {
			collectionSet.Insert(partition.GetCollectionID())
		}
		isGetAll = true
	}
	collections := collectionSet.Collect()

	resp := &querypb.ShowCollectionsResponse{
		Status:                successStatus,
		CollectionIDs:         make([]int64, 0, len(collectionSet)),
		InMemoryPercentages:   make([]int64, 0, len(collectionSet)),
		QueryServiceAvailable: make([]bool, 0, len(collectionSet)),
	}
	for _, collectionID := range collections {
		log := log.With(zap.Int64("collectionID", collectionID))

		percentage := s.meta.CollectionManager.GetLoadPercentage(collectionID)
		if percentage < 0 {
			if isGetAll {
				// The collection is released during this,
				// ignore it
				continue
			}
			status := meta.GlobalFailedLoadCache.Get(collectionID)
			if status.ErrorCode != commonpb.ErrorCode_Success {
				log.Warn("show collection failed", zap.String("errCode", status.GetErrorCode().String()), zap.String("reason", status.GetReason()))
				return &querypb.ShowCollectionsResponse{
					Status: status,
				}, nil
			}
			err := fmt.Errorf("collection %d has not been loaded to memory or load failed", collectionID)
			log.Warn("show collection failed", zap.Error(err))
			return &querypb.ShowCollectionsResponse{
				Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, err.Error()),
			}, nil
		}
		resp.CollectionIDs = append(resp.CollectionIDs, collectionID)
		resp.InMemoryPercentages = append(resp.InMemoryPercentages, int64(percentage))
		resp.QueryServiceAvailable = append(resp.QueryServiceAvailable, s.checkAnyReplicaAvailable(collectionID))
	}

	return resp, nil
}

func (s *Server) ShowPartitions(ctx context.Context, req *querypb.ShowPartitionsRequest) (*querypb.ShowPartitionsResponse, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("show partitions request received", zap.Int64s("partitions", req.GetPartitionIDs()))

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to show partitions"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return &querypb.ShowPartitionsResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy),
		}, nil
	}
	defer meta.GlobalFailedLoadCache.TryExpire()

	// TODO(yah01): now, for load collection, the percentage of partition is equal to the percentage of collection,
	// we can calculates the real percentage of partitions
	partitions := req.GetPartitionIDs()
	percentages := make([]int64, 0)
	isReleased := false
	switch s.meta.GetLoadType(req.GetCollectionID()) {
	case querypb.LoadType_LoadCollection:
		percentage := s.meta.GetLoadPercentage(req.GetCollectionID())
		if percentage < 0 {
			isReleased = true
			break
		}

		if len(partitions) == 0 {
			var err error
			partitions, err = s.broker.GetPartitions(ctx, req.GetCollectionID())
			if err != nil {
				msg := "failed to show partitions"
				log.Warn(msg, zap.Error(err))
				return &querypb.ShowPartitionsResponse{
					Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, err),
				}, nil
			}
		}
		for range partitions {
			percentages = append(percentages, int64(percentage))
		}

	case querypb.LoadType_LoadPartition:
		if len(partitions) == 0 {
			partitions = lo.Map(s.meta.GetPartitionsByCollection(req.GetCollectionID()), func(partition *meta.Partition, _ int) int64 {
				return partition.GetPartitionID()
			})
		}
		for _, partitionID := range partitions {
			partition := s.meta.GetPartition(partitionID)
			if partition == nil {
				isReleased = true
				break
			}
			percentages = append(percentages, int64(partition.LoadPercentage))
		}

	default:
		isReleased = true
	}

	if isReleased {
		status := meta.GlobalFailedLoadCache.Get(req.GetCollectionID())
		if status.ErrorCode != commonpb.ErrorCode_Success {
			log.Warn("show collection failed", zap.String("errCode", status.GetErrorCode().String()), zap.String("reason", status.GetReason()))
			return &querypb.ShowPartitionsResponse{
				Status: status,
			}, nil
		}
		msg := fmt.Sprintf("collection %v has not been loaded into QueryNode", req.GetCollectionID())
		log.Warn(msg)
		return &querypb.ShowPartitionsResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg),
		}, nil
	}

	return &querypb.ShowPartitionsResponse{
		Status:              successStatus,
		PartitionIDs:        partitions,
		InMemoryPercentages: percentages,
	}, nil
}

func (s *Server) LoadCollection(ctx context.Context, req *querypb.LoadCollectionRequest) (*commonpb.Status, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("load collection request received",
		zap.Any("schema", req.Schema),
		zap.Int32("replicaNumber", req.ReplicaNumber),
		zap.Int64s("fieldIndexes", lo.Values(req.GetFieldIndexID())),
	)
	metrics.QueryCoordLoadCount.WithLabelValues(metrics.TotalLabel).Inc()

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to load collection"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy), nil
	}

	// If refresh mode is ON.
	if req.GetRefresh() {
		return s.refreshCollection(ctx, req.GetCollectionID())
	}

	loadJob := job.NewLoadCollectionJob(ctx,
		req,
		s.dist,
		s.meta,
		s.targetMgr,
		s.broker,
		s.nodeMgr,
	)
	s.jobScheduler.Add(loadJob)
	err := loadJob.Wait()
	if err != nil && !errors.Is(err, job.ErrCollectionLoaded) {
		msg := "failed to load collection"
		log.Warn(msg, zap.Error(err))
		metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(errCode(err), msg, err), nil
	}

	metrics.QueryCoordLoadCount.WithLabelValues(metrics.SuccessLabel).Inc()
	return successStatus, nil
}

func (s *Server) ReleaseCollection(ctx context.Context, req *querypb.ReleaseCollectionRequest) (*commonpb.Status, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("release collection request received")
	metrics.QueryCoordReleaseCount.WithLabelValues(metrics.TotalLabel).Inc()
	tr := timerecord.NewTimeRecorder("release-collection")

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to release collection"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy), nil
	}

	releaseJob := job.NewReleaseCollectionJob(ctx,
		req,
		s.dist,
		s.meta,
		s.targetMgr,
		s.targetObserver,
	)
	s.jobScheduler.Add(releaseJob)
	err := releaseJob.Wait()
	if err != nil {
		msg := "failed to release collection"
		log.Error(msg, zap.Error(err))
		metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, err), nil
	}

	log.Info("collection released")
	metrics.QueryCoordReleaseCount.WithLabelValues(metrics.SuccessLabel).Inc()
	metrics.QueryCoordReleaseLatency.WithLabelValues().Observe(float64(tr.ElapseSpan().Milliseconds()))
	meta.GlobalFailedLoadCache.Remove(req.GetCollectionID())

	return successStatus, nil
}

func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitionsRequest) (*commonpb.Status, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("received load partitions request",
		zap.Any("schema", req.Schema),
		zap.Int32("replicaNumber", req.ReplicaNumber),
		zap.Int64s("partitions", req.GetPartitionIDs()))
	metrics.QueryCoordLoadCount.WithLabelValues(metrics.TotalLabel).Inc()

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to load partitions"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy), nil
	}

	// If refresh mode is ON.
	if req.GetRefresh() {
		return s.refreshPartitions(ctx, req.GetCollectionID(), req.GetPartitionIDs())
	}

	loadJob := job.NewLoadPartitionJob(ctx,
		req,
		s.dist,
		s.meta,
		s.targetMgr,
		s.broker,
		s.nodeMgr,
	)
	s.jobScheduler.Add(loadJob)
	err := loadJob.Wait()
	if err != nil && !errors.Is(err, job.ErrCollectionLoaded) {
		msg := "failed to load partitions"
		log.Warn(msg, zap.Error(err))
		metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(errCode(err), msg, err), nil
	}

	metrics.QueryCoordLoadCount.WithLabelValues(metrics.SuccessLabel).Inc()
	return successStatus, nil
}

func (s *Server) ReleasePartitions(ctx context.Context, req *querypb.ReleasePartitionsRequest) (*commonpb.Status, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("release partitions", zap.Int64s("partitions", req.GetPartitionIDs()))
	metrics.QueryCoordReleaseCount.WithLabelValues(metrics.TotalLabel).Inc()

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to release partitions"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy), nil
	}

	if len(req.GetPartitionIDs()) == 0 {
		msg := "partitions is empty"
		log.Warn(msg)
		metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
	}

	tr := timerecord.NewTimeRecorder("release-partitions")
	releaseJob := job.NewReleasePartitionJob(ctx,
		req,
		s.dist,
		s.meta,
		s.targetMgr,
		s.targetObserver,
	)
	s.jobScheduler.Add(releaseJob)
	err := releaseJob.Wait()
	if err != nil {
		msg := "failed to release partitions"
		log.Error(msg, zap.Error(err))
		metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, err), nil
	}

	metrics.QueryCoordReleaseCount.WithLabelValues(metrics.SuccessLabel).Inc()
	metrics.QueryCoordReleaseLatency.WithLabelValues().Observe(float64(tr.ElapseSpan().Milliseconds()))

	meta.GlobalFailedLoadCache.Remove(req.GetCollectionID())
	return successStatus, nil
}

func (s *Server) GetPartitionStates(ctx context.Context, req *querypb.GetPartitionStatesRequest) (*querypb.GetPartitionStatesResponse, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("get partition states", zap.Int64s("partitions", req.GetPartitionIDs()))

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to get partition states"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return &querypb.GetPartitionStatesResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy),
		}, nil
	}

	msg := "partition not loaded"
	notLoadResp := &querypb.GetPartitionStatesResponse{
		Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg),
	}

	states := make([]*querypb.PartitionStates, 0, len(req.GetPartitionIDs()))
	switch s.meta.GetLoadType(req.GetCollectionID()) {
	case querypb.LoadType_LoadCollection:
		collection := s.meta.GetCollection(req.GetCollectionID())
		state := querypb.PartitionState_PartialInMemory
		if collection.LoadPercentage >= 100 {
			state = querypb.PartitionState_InMemory
		}
		releasedPartitions := typeutil.NewUniqueSet(collection.GetReleasedPartitions()...)
		for _, partition := range req.GetPartitionIDs() {
			if releasedPartitions.Contain(partition) {
				log.Warn(msg)
				return notLoadResp, nil
			}
			states = append(states, &querypb.PartitionStates{
				PartitionID: partition,
				State:       state,
			})
		}

	case querypb.LoadType_LoadPartition:
		for _, partitionID := range req.GetPartitionIDs() {
			partition := s.meta.GetPartition(partitionID)
			if partition == nil {
				log.Warn(msg, zap.Int64("partition", partitionID))
				return notLoadResp, nil
			}
			state := querypb.PartitionState_PartialInMemory
			if partition.LoadPercentage >= 100 {
				state = querypb.PartitionState_InMemory
			}
			states = append(states, &querypb.PartitionStates{
				PartitionID: partitionID,
				State:       state,
			})
		}

	default:
		log.Warn(msg)
		return notLoadResp, nil
	}

	return &querypb.GetPartitionStatesResponse{
		Status:                successStatus,
		PartitionDescriptions: states,
	}, nil
}

func (s *Server) GetSegmentInfo(ctx context.Context, req *querypb.GetSegmentInfoRequest) (*querypb.GetSegmentInfoResponse, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("get segment info", zap.Int64s("segments", req.GetSegmentIDs()))

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to get segment info"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return &querypb.GetSegmentInfoResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy),
		}, nil
	}

	infos := make([]*querypb.SegmentInfo, 0, len(req.GetSegmentIDs()))
	if len(req.GetSegmentIDs()) == 0 {
		infos = s.getCollectionSegmentInfo(req.GetCollectionID())
	} else {
		for _, segmentID := range req.GetSegmentIDs() {
			segments := s.dist.SegmentDistManager.Get(segmentID)
			if len(segments) == 0 {
				msg := fmt.Sprintf("segment %v not found in any node", segmentID)
				log.Warn(msg, zap.Int64("segment", segmentID))
				return &querypb.GetSegmentInfoResponse{
					Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg),
				}, nil
			}
			info := &querypb.SegmentInfo{}
			utils.MergeMetaSegmentIntoSegmentInfo(info, segments...)
			infos = append(infos, info)
		}
	}

	return &querypb.GetSegmentInfoResponse{
		Status: successStatus,
		Infos:  infos,
	}, nil
}

// refreshCollection must be called after loading a collection. It looks for new segments that are not loaded yet and
// tries to load them up. It returns when all segments of the given collection are loaded, or when error happens.
// Note that a collection's loading progress always stays at 100% after a successful load and will not get updated
// during refreshCollection.
func (s *Server) refreshCollection(ctx context.Context, collID int64) (*commonpb.Status, error) {
	ctx, cancel := context.WithTimeout(ctx, Params.QueryCoordCfg.LoadTimeoutSeconds.GetAsDuration(time.Second))
	defer cancel()

	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", collID),
	)
	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to refresh collection"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy), nil
	}

	// Check that collection is fully loaded.
	if s.meta.CollectionManager.GetLoadPercentage(collID) != 100 {
		errMsg := "a collection must be fully loaded before refreshing"
		log.Warn(errMsg)
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    "a collection must be fully loaded before refreshing",
		}, nil
	}

	// Pull the latest target.
	readyCh, err := s.targetObserver.UpdateNextTarget(collID)
	if err != nil {
		log.Warn("failed to update next target", zap.Error(err))
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}, nil
	}

	select {
	case <-ctx.Done():
		log.Warn("refresh collection failed as context canceled")
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    "context canceled",
		}, nil
	case <-readyCh:
		log.Info("refresh collection succeeded")
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
	}
}

// refreshPartitions must be called after loading a collection. It looks for new segments that are not loaded yet and
// tries to load them up. It returns when all segments of the given collection are loaded, or when error happens.
// Note that a collection's loading progress always stays at 100% after a successful load and will not get updated
// during refreshPartitions.
func (s *Server) refreshPartitions(ctx context.Context, collID int64, partIDs []int64) (*commonpb.Status, error) {
	ctx, cancel := context.WithTimeout(ctx, Params.QueryCoordCfg.LoadTimeoutSeconds.GetAsDuration(time.Second))
	defer cancel()

	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", collID),
		zap.Int64s("partitionIDs", partIDs),
	)
	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to refresh partitions"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy), nil
	}

	// Check that all partitions are fully loaded.
	if s.meta.CollectionManager.GetLoadPercentage(collID) != 100 {
		errMsg := "partitions must be fully loaded before refreshing"
		log.Warn(errMsg)
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    errMsg,
		}, nil
	}

	// Pull the latest target.
	readyCh, err := s.targetObserver.UpdateNextTarget(collID)
	if err != nil {
		log.Warn("failed to update next target", zap.Error(err))
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}, nil
	}

	select {
	case <-ctx.Done():
		log.Warn("refresh partitions failed as context canceled")
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    "context canceled",
		}, nil
	case <-readyCh:
		log.Info("refresh partitions succeeded")
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
	}
}

func (s *Server) isStoppingNode(nodeID int64) error {
	isStopping, err := s.nodeMgr.IsStoppingNode(nodeID)
	if err != nil {
		log.Warn("fail to check whether the node is stopping", zap.Int64("node_id", nodeID), zap.Error(err))
		return err
	}
	if isStopping {
		msg := fmt.Sprintf("failed to balance due to the source/destination node[%d] is stopping", nodeID)
		log.Warn(msg)
		return errors.New(msg)
	}
	return nil
}

func (s *Server) LoadBalance(ctx context.Context, req *querypb.LoadBalanceRequest) (*commonpb.Status, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("load balance request received",
		zap.Int64s("source", req.GetSourceNodeIDs()),
		zap.Int64s("dest", req.GetDstNodeIDs()),
		zap.Int64s("segments", req.GetSealedSegmentIDs()))

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to load balance"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy), nil
	}

	// Verify request
	if len(req.GetSourceNodeIDs()) != 1 {
		msg := "source nodes can only contain 1 node"
		log.Warn(msg, zap.Int("source-nodes-num", len(req.GetSourceNodeIDs())))
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
	}
	if s.meta.CollectionManager.GetLoadPercentage(req.GetCollectionID()) < 100 {
		msg := "can't balance segments of not fully loaded collection"
		log.Warn(msg)
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
	}
	srcNode := req.GetSourceNodeIDs()[0]
	replica := s.meta.ReplicaManager.GetByCollectionAndNode(req.GetCollectionID(), srcNode)
	if replica == nil {
		msg := "source node not found in any replica"
		log.Warn(msg)
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
	}
	if err := s.isStoppingNode(srcNode); err != nil {
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError,
			fmt.Sprintf("can't balance, because the source node[%d] is invalid", srcNode), err), nil
	}
	for _, dstNode := range req.GetDstNodeIDs() {
		if !replica.Nodes.Contain(dstNode) {
			msg := "destination nodes have to be in the same replica of source node"
			log.Warn(msg)
			return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
		}
		if err := s.isStoppingNode(dstNode); err != nil {
			return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError,
				fmt.Sprintf("can't balance, because the destination node[%d] is invalid", dstNode), err), nil
		}
	}

	err := s.balanceSegments(ctx, req, replica)
	if err != nil {
		msg := "failed to balance segments"
		log.Warn(msg, zap.Error(err))
		return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, err), nil
	}
	return successStatus, nil
}

func (s *Server) ShowConfigurations(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error) {
	log := log.Ctx(ctx)

	log.Info("show configurations request received", zap.String("pattern", req.GetPattern()))

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to show configurations"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return &internalpb.ShowConfigurationsResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy),
		}, nil
	}
	configList := make([]*commonpb.KeyValuePair, 0)
	for key, value := range Params.GetComponentConfigurations("querycoord", req.Pattern) {
		configList = append(configList,
			&commonpb.KeyValuePair{
				Key:   key,
				Value: value,
			})
	}

	return &internalpb.ShowConfigurationsResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
			Reason:    "",
		},
		Configuations: configList,
	}, nil
}

func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
	log := log.Ctx(ctx)

	log.RatedDebug(60, "get metrics request received",
		zap.String("metricType", req.GetRequest()))

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to get metrics"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return &milvuspb.GetMetricsResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy),
		}, nil
	}

	resp := &milvuspb.GetMetricsResponse{
		Status: successStatus,
		ComponentName: metricsinfo.ConstructComponentName(typeutil.QueryCoordRole,
			paramtable.GetNodeID()),
	}

	metricType, err := metricsinfo.ParseMetricType(req.GetRequest())
	if err != nil {
		msg := "failed to parse metric type"
		log.Warn(msg, zap.Error(err))
		resp.Status = utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, err)
		return resp, nil
	}

	if metricType != metricsinfo.SystemInfoMetrics {
		msg := "invalid metric type"
		err := errors.New(metricsinfo.MsgUnimplementedMetric)
		log.Warn(msg, zap.Error(err))
		resp.Status = utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, err)
		return resp, nil
	}

	resp.Response, err = s.getSystemInfoMetrics(ctx, req)
	if err != nil {
		msg := "failed to get system info metrics"
		log.Warn(msg, zap.Error(err))
		resp.Status = utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, err)
		return resp, nil
	}

	return resp, nil
}

func (s *Server) GetReplicas(ctx context.Context, req *milvuspb.GetReplicasRequest) (*milvuspb.GetReplicasResponse, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("get replicas request received", zap.Bool("with-shard-nodes", req.GetWithShardNodes()))

	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to get replicas"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return &milvuspb.GetReplicasResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy),
		}, nil
	}

	resp := &milvuspb.GetReplicasResponse{
		Status:   successStatus,
		Replicas: make([]*milvuspb.ReplicaInfo, 0),
	}

	replicas := s.meta.ReplicaManager.GetByCollection(req.GetCollectionID())
	if len(replicas) == 0 {
		msg := "failed to get replicas, collection not loaded"
		log.Warn(msg)
		resp.Status = utils.WrapStatus(commonpb.ErrorCode_MetaFailed, msg)
		return resp, nil
	}

	for _, replica := range replicas {
		info, err := s.fillReplicaInfo(replica, req.GetWithShardNodes())
		if err != nil {
			msg := "failed to get replica info"
			log.Warn(msg,
				zap.Int64("replica", replica.GetID()),
				zap.Error(err))
			resp.Status = utils.WrapStatus(commonpb.ErrorCode_MetaFailed, msg, err)
		}
		resp.Replicas = append(resp.Replicas, info)
	}
	return resp, nil
}

func (s *Server) GetShardLeaders(ctx context.Context, req *querypb.GetShardLeadersRequest) (*querypb.GetShardLeadersResponse, error) {
	log := log.Ctx(ctx).With(
		zap.Int64("collectionID", req.GetCollectionID()),
	)

	log.Info("get shard leaders request received")
	if s.status.Load() != commonpb.StateCode_Healthy {
		msg := "failed to get shard leaders"
		log.Warn(msg, zap.Error(ErrNotHealthy))
		return &querypb.GetShardLeadersResponse{
			Status: utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy),
		}, nil
	}

	resp := &querypb.GetShardLeadersResponse{
		Status: successStatus,
	}

	if s.meta.CollectionManager.GetLoadPercentage(req.GetCollectionID()) < 100 {
		msg := fmt.Sprintf("collection %v is not fully loaded", req.GetCollectionID())
		log.Warn(msg)
		resp.Status = utils.WrapStatus(commonpb.ErrorCode_NoReplicaAvailable, msg)
		return resp, nil
	}

	channels := s.targetMgr.GetDmChannelsByCollection(req.GetCollectionID(), meta.CurrentTarget)
	if len(channels) == 0 {
		msg := "failed to get channels"
		log.Warn(msg, zap.Error(meta.ErrCollectionNotFound))
		resp.Status = utils.WrapStatus(commonpb.ErrorCode_MetaFailed, msg, meta.ErrCollectionNotFound)
		return resp, nil
	}

	currentTargets := s.targetMgr.GetHistoricalSegmentsByCollection(req.GetCollectionID(), meta.CurrentTarget)
	for _, channel := range channels {
		log := log.With(zap.String("channel", channel.GetChannelName()))

		leaders := s.dist.LeaderViewManager.GetLeadersByShard(channel.GetChannelName())
		ids := make([]int64, 0, len(leaders))
		addrs := make([]string, 0, len(leaders))

		var channelErr error

		// In a replica, a shard is available, if and only if:
		// 1. The leader is online
		// 2. All QueryNodes in the distribution are online
		// 3. The last heartbeat response time is within HeartbeatAvailableInterval for all QueryNodes(include leader) in the distribution
		// 4. All segments of the shard in target should be in the distribution
		for _, leader := range leaders {
			log := log.With(zap.Int64("leaderID", leader.ID))
			info := s.nodeMgr.Get(leader.ID)

			// Check whether leader is online
			err := checkNodeAvailable(leader.ID, info)
			if err != nil {
				log.Info("leader is not available", zap.Error(err))
				multierr.AppendInto(&channelErr, fmt.Errorf("leader not available: %w", err))
				continue
			}
			// Check whether QueryNodes are online and available
			isAvailable := true
			for _, version := range leader.Segments {
				info := s.nodeMgr.Get(version.GetNodeID())
				err = checkNodeAvailable(version.GetNodeID(), info)
				if err != nil {
					log.Info("leader is not available due to QueryNode unavailable", zap.Error(err))
					isAvailable = false
					multierr.AppendInto(&channelErr, err)
					break
				}
			}

			// Avoid iterating all segments if any QueryNode unavailable
			if !isAvailable {
				continue
			}

			// Check whether segments are fully loaded
			for segmentID, info := range currentTargets {
				if info.GetInsertChannel() != leader.Channel {
					continue
				}

				_, exist := leader.Segments[segmentID]
				if !exist {
					log.Info("leader is not available due to lack of segment", zap.Int64("segmentID", segmentID))
					multierr.AppendInto(&channelErr, WrapErrLackSegment(segmentID))
					isAvailable = false
					break
				}
			}
			if !isAvailable {
				continue
			}

			ids = append(ids, info.ID())
			addrs = append(addrs, info.Addr())
		}

		if len(ids) == 0 {
			msg := fmt.Sprintf("channel %s is not available in any replica", channel.GetChannelName())
			log.Warn(msg, zap.Error(channelErr))
			resp.Status = utils.WrapStatus(commonpb.ErrorCode_NoReplicaAvailable, msg, channelErr)
			resp.Shards = nil
			return resp, nil
		}

		resp.Shards = append(resp.Shards, &querypb.ShardLeadersList{
			ChannelName: channel.GetChannelName(),
			NodeIds:     ids,
			NodeAddrs:   addrs,
		})
	}

	return resp, nil
}

func (s *Server) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error) {
	if s.status.Load() != commonpb.StateCode_Healthy {
		reason := errorutil.UnHealthReason("querycoord", s.session.ServerID, "querycoord is unhealthy")
		return &milvuspb.CheckHealthResponse{IsHealthy: false, Reasons: []string{reason}}, nil
	}

	group, ctx := errgroup.WithContext(ctx)
	errReasons := make([]string, 0, len(s.nodeMgr.GetAll()))

	mu := &sync.Mutex{}
	for _, node := range s.nodeMgr.GetAll() {
		node := node
		group.Go(func() error {
			resp, err := s.cluster.GetComponentStates(ctx, node.ID())
			isHealthy, reason := errorutil.UnHealthReasonWithComponentStatesOrErr("querynode", node.ID(), resp, err)
			if !isHealthy {
				mu.Lock()
				defer mu.Unlock()
				errReasons = append(errReasons, reason)
			}
			return err
		})
	}

	err := group.Wait()
	if err != nil || len(errReasons) != 0 {
		return &milvuspb.CheckHealthResponse{IsHealthy: false, Reasons: errReasons}, nil
	}

	return &milvuspb.CheckHealthResponse{IsHealthy: true, Reasons: errReasons}, nil
}