2022-10-11 11:39:22 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
package checkers
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2022-10-19 12:13:28 +08:00
|
|
|
"sync"
|
2022-09-15 18:48:32 +08:00
|
|
|
"time"
|
|
|
|
|
2023-11-24 18:08:24 +08:00
|
|
|
"github.com/cockroachdb/errors"
|
2023-09-21 09:45:27 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/balance"
|
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
|
|
|
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/task"
|
2024-01-05 15:54:55 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
2022-09-15 18:48:32 +08:00
|
|
|
)
|
|
|
|
|
2024-01-05 15:54:55 +08:00
|
|
|
var errTypeNotFound = errors.New("checker type not found")
|
2023-10-27 01:08:12 +08:00
|
|
|
|
2024-05-21 14:29:39 +08:00
|
|
|
type GetBalancerFunc = func() balance.Balance
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
type CheckerController struct {
|
2023-09-27 16:27:27 +08:00
|
|
|
cancel context.CancelFunc
|
2024-01-05 15:54:55 +08:00
|
|
|
manualCheckChs map[utils.CheckerType]chan struct{}
|
2023-07-19 16:50:57 +08:00
|
|
|
meta *meta.Meta
|
|
|
|
dist *meta.DistributionManager
|
2024-07-01 10:26:06 +08:00
|
|
|
targetMgr meta.TargetManagerInterface
|
2023-07-19 21:22:58 +08:00
|
|
|
broker meta.Broker
|
2023-07-19 16:50:57 +08:00
|
|
|
nodeMgr *session.NodeManager
|
|
|
|
balancer balance.Balance
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
scheduler task.Scheduler
|
2024-01-05 15:54:55 +08:00
|
|
|
checkers map[utils.CheckerType]Checker
|
2022-10-19 12:13:28 +08:00
|
|
|
|
|
|
|
stopOnce sync.Once
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewCheckerController(
|
|
|
|
meta *meta.Meta,
|
|
|
|
dist *meta.DistributionManager,
|
2024-07-01 10:26:06 +08:00
|
|
|
targetMgr meta.TargetManagerInterface,
|
2023-04-03 14:16:25 +08:00
|
|
|
nodeMgr *session.NodeManager,
|
2023-07-19 21:22:58 +08:00
|
|
|
scheduler task.Scheduler,
|
|
|
|
broker meta.Broker,
|
2024-05-21 14:29:39 +08:00
|
|
|
getBalancerFunc GetBalancerFunc,
|
2023-07-19 21:22:58 +08:00
|
|
|
) *CheckerController {
|
2022-09-15 18:48:32 +08:00
|
|
|
// CheckerController runs checkers with the order,
|
|
|
|
// the former checker has higher priority
|
2024-01-05 15:54:55 +08:00
|
|
|
checkers := map[utils.CheckerType]Checker{
|
2024-05-21 14:29:39 +08:00
|
|
|
utils.ChannelChecker: NewChannelChecker(meta, dist, targetMgr, nodeMgr, getBalancerFunc),
|
|
|
|
utils.SegmentChecker: NewSegmentChecker(meta, dist, targetMgr, nodeMgr, getBalancerFunc),
|
|
|
|
utils.BalanceChecker: NewBalanceChecker(meta, targetMgr, nodeMgr, scheduler, getBalancerFunc),
|
2024-07-01 10:26:06 +08:00
|
|
|
utils.IndexChecker: NewIndexChecker(meta, dist, broker, nodeMgr, targetMgr),
|
2024-06-10 21:34:08 +08:00
|
|
|
// todo temporary work around must fix
|
|
|
|
// utils.LeaderChecker: NewLeaderChecker(meta, dist, targetMgr, nodeMgr, true),
|
2024-06-11 14:21:56 +08:00
|
|
|
utils.LeaderChecker: NewLeaderChecker(meta, dist, targetMgr, nodeMgr),
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2023-07-19 16:50:57 +08:00
|
|
|
|
2024-01-05 15:54:55 +08:00
|
|
|
manualCheckChs := map[utils.CheckerType]chan struct{}{
|
|
|
|
utils.ChannelChecker: make(chan struct{}, 1),
|
|
|
|
utils.SegmentChecker: make(chan struct{}, 1),
|
|
|
|
utils.BalanceChecker: make(chan struct{}, 1),
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return &CheckerController{
|
2023-07-19 16:50:57 +08:00
|
|
|
manualCheckChs: manualCheckChs,
|
|
|
|
meta: meta,
|
|
|
|
dist: dist,
|
|
|
|
targetMgr: targetMgr,
|
|
|
|
scheduler: scheduler,
|
|
|
|
checkers: checkers,
|
2023-07-19 21:22:58 +08:00
|
|
|
broker: broker,
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-27 16:27:27 +08:00
|
|
|
func (controller *CheckerController) Start() {
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
controller.cancel = cancel
|
|
|
|
|
2023-10-27 01:08:12 +08:00
|
|
|
for checker := range controller.checkers {
|
|
|
|
go controller.startChecker(ctx, checker)
|
2023-07-19 16:50:57 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-05 15:54:55 +08:00
|
|
|
func getCheckerInterval(checker utils.CheckerType) time.Duration {
|
2023-10-27 01:08:12 +08:00
|
|
|
switch checker {
|
2024-01-05 15:54:55 +08:00
|
|
|
case utils.SegmentChecker:
|
2023-07-19 16:50:57 +08:00
|
|
|
return Params.QueryCoordCfg.SegmentCheckInterval.GetAsDuration(time.Millisecond)
|
2024-01-05 15:54:55 +08:00
|
|
|
case utils.ChannelChecker:
|
2023-07-19 16:50:57 +08:00
|
|
|
return Params.QueryCoordCfg.ChannelCheckInterval.GetAsDuration(time.Millisecond)
|
2024-01-05 15:54:55 +08:00
|
|
|
case utils.BalanceChecker:
|
2023-07-19 16:50:57 +08:00
|
|
|
return Params.QueryCoordCfg.BalanceCheckInterval.GetAsDuration(time.Millisecond)
|
2024-01-05 15:54:55 +08:00
|
|
|
case utils.IndexChecker:
|
2023-07-19 21:22:58 +08:00
|
|
|
return Params.QueryCoordCfg.IndexCheckInterval.GetAsDuration(time.Millisecond)
|
2024-01-05 15:54:55 +08:00
|
|
|
case utils.LeaderChecker:
|
2024-01-17 19:40:53 +08:00
|
|
|
return Params.QueryCoordCfg.LeaderViewUpdateInterval.GetAsDuration(time.Second)
|
2023-07-19 16:50:57 +08:00
|
|
|
default:
|
|
|
|
return Params.QueryCoordCfg.CheckInterval.GetAsDuration(time.Millisecond)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-05 15:54:55 +08:00
|
|
|
func (controller *CheckerController) startChecker(ctx context.Context, checker utils.CheckerType) {
|
2023-10-27 01:08:12 +08:00
|
|
|
interval := getCheckerInterval(checker)
|
2023-07-19 16:50:57 +08:00
|
|
|
ticker := time.NewTicker(interval)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
log.Info("Checker stopped",
|
2023-10-27 01:08:12 +08:00
|
|
|
zap.String("type", checker.String()))
|
2023-07-19 16:50:57 +08:00
|
|
|
return
|
|
|
|
|
|
|
|
case <-ticker.C:
|
2023-10-27 01:08:12 +08:00
|
|
|
controller.check(ctx, checker)
|
2023-07-19 16:50:57 +08:00
|
|
|
|
2023-10-27 01:08:12 +08:00
|
|
|
case <-controller.manualCheckChs[checker]:
|
2023-07-19 16:50:57 +08:00
|
|
|
ticker.Stop()
|
2023-10-27 01:08:12 +08:00
|
|
|
controller.check(ctx, checker)
|
2023-09-09 08:29:16 +08:00
|
|
|
ticker.Reset(interval)
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2023-07-19 16:50:57 +08:00
|
|
|
}
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (controller *CheckerController) Stop() {
|
2022-10-19 12:13:28 +08:00
|
|
|
controller.stopOnce.Do(func() {
|
2023-09-27 16:27:27 +08:00
|
|
|
if controller.cancel != nil {
|
|
|
|
controller.cancel()
|
|
|
|
}
|
2022-10-19 12:13:28 +08:00
|
|
|
})
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
2022-12-06 22:59:19 +08:00
|
|
|
func (controller *CheckerController) Check() {
|
2023-07-19 16:50:57 +08:00
|
|
|
for _, checkCh := range controller.manualCheckChs {
|
|
|
|
select {
|
|
|
|
case checkCh <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
2023-05-08 14:06:41 +08:00
|
|
|
}
|
2022-12-06 22:59:19 +08:00
|
|
|
}
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
// check is the real implementation of Check
|
2024-01-05 15:54:55 +08:00
|
|
|
func (controller *CheckerController) check(ctx context.Context, checkType utils.CheckerType) {
|
2023-10-27 01:08:12 +08:00
|
|
|
checker := controller.checkers[checkType]
|
2023-07-19 16:50:57 +08:00
|
|
|
tasks := checker.Check(ctx)
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
for _, task := range tasks {
|
2022-09-28 19:58:54 +08:00
|
|
|
err := controller.scheduler.Add(task)
|
|
|
|
if err != nil {
|
2023-03-16 17:43:55 +08:00
|
|
|
task.Cancel(err)
|
2022-09-28 19:58:54 +08:00
|
|
|
continue
|
|
|
|
}
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
}
|
2023-11-24 18:08:24 +08:00
|
|
|
|
2024-01-05 15:54:55 +08:00
|
|
|
func (controller *CheckerController) Deactivate(typ utils.CheckerType) error {
|
2023-11-24 18:08:24 +08:00
|
|
|
for _, checker := range controller.checkers {
|
|
|
|
if checker.ID() == typ {
|
|
|
|
checker.Deactivate()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return errTypeNotFound
|
|
|
|
}
|
|
|
|
|
2024-01-05 15:54:55 +08:00
|
|
|
func (controller *CheckerController) Activate(typ utils.CheckerType) error {
|
2023-11-24 18:08:24 +08:00
|
|
|
for _, checker := range controller.checkers {
|
|
|
|
if checker.ID() == typ {
|
|
|
|
checker.Activate()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return errTypeNotFound
|
|
|
|
}
|
|
|
|
|
2024-01-05 15:54:55 +08:00
|
|
|
func (controller *CheckerController) IsActive(typ utils.CheckerType) (bool, error) {
|
2023-11-24 18:08:24 +08:00
|
|
|
for _, checker := range controller.checkers {
|
|
|
|
if checker.ID() == typ {
|
|
|
|
return checker.IsActive(), nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false, errTypeNotFound
|
|
|
|
}
|
2023-12-04 17:38:37 +08:00
|
|
|
|
|
|
|
func (controller *CheckerController) Checkers() []Checker {
|
|
|
|
checkers := make([]Checker, 0, len(controller.checkers))
|
|
|
|
for _, checker := range controller.checkers {
|
|
|
|
checkers = append(checkers, checker)
|
|
|
|
}
|
|
|
|
return checkers
|
|
|
|
}
|