milvus/internal/datanode/channel_manager.go

506 lines
13 KiB
Go
Raw Normal View History

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datanode
import (
"context"
"sync"
"time"
"github.com/cockroachdb/errors"
"go.uber.org/atomic"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
type releaseFunc func(channel string)
type ChannelManager struct {
mu sync.RWMutex
dn *DataNode
communicateCh chan *opState
runningFlowgraphs *flowgraphManager
opRunners *typeutil.ConcurrentMap[string, *opRunner] // channel -> runner
abnormals *typeutil.ConcurrentMap[int64, string] // OpID -> Channel
releaseFunc releaseFunc
closeCh chan struct{}
closeOnce sync.Once
closeWaiter sync.WaitGroup
}
func NewChannelManager(dn *DataNode) *ChannelManager {
fm := newFlowgraphManager()
cm := ChannelManager{
dn: dn,
communicateCh: make(chan *opState, 100),
runningFlowgraphs: fm,
opRunners: typeutil.NewConcurrentMap[string, *opRunner](),
abnormals: typeutil.NewConcurrentMap[int64, string](),
releaseFunc: fm.release,
closeCh: make(chan struct{}),
}
return &cm
}
func (m *ChannelManager) Submit(info *datapb.ChannelWatchInfo) error {
channel := info.GetVchan().GetChannelName()
runner := m.getOrCreateRunner(channel)
return runner.Enqueue(info)
}
func (m *ChannelManager) GetProgress(info *datapb.ChannelWatchInfo) *datapb.ChannelOperationProgressResponse {
m.mu.RLock()
defer m.mu.RUnlock()
resp := &datapb.ChannelOperationProgressResponse{
Status: merr.Success(),
OpID: info.GetOpID(),
}
channel := info.GetVchan().GetChannelName()
switch info.GetState() {
case datapb.ChannelWatchState_ToWatch:
if m.runningFlowgraphs.existWithOpID(channel, info.GetOpID()) {
resp.State = datapb.ChannelWatchState_WatchSuccess
return resp
}
if runner, ok := m.opRunners.Get(channel); ok {
if runner.Exist(info.GetOpID()) {
resp.State = datapb.ChannelWatchState_ToWatch
} else {
resp.State = datapb.ChannelWatchState_WatchFailure
}
return resp
}
resp.State = datapb.ChannelWatchState_WatchFailure
return resp
case datapb.ChannelWatchState_ToRelease:
if !m.runningFlowgraphs.exist(channel) {
resp.State = datapb.ChannelWatchState_ReleaseSuccess
return resp
}
if runner, ok := m.opRunners.Get(channel); ok && runner.Exist(info.GetOpID()) {
resp.State = datapb.ChannelWatchState_ToRelease
return resp
}
resp.State = datapb.ChannelWatchState_ReleaseFailure
return resp
default:
err := merr.WrapErrParameterInvalid("ToWatch or ToRelease", info.GetState().String())
log.Warn("fail to get progress", zap.Error(err))
resp.Status = merr.Status(err)
return resp
}
}
func (m *ChannelManager) Close() {
m.closeOnce.Do(func() {
m.opRunners.Range(func(channel string, runner *opRunner) bool {
runner.Close()
return true
})
m.runningFlowgraphs.close()
close(m.closeCh)
m.closeWaiter.Wait()
})
}
func (m *ChannelManager) Start() {
m.closeWaiter.Add(2)
go m.runningFlowgraphs.start(&m.closeWaiter)
go func() {
defer m.closeWaiter.Done()
log.Info("DataNode ChannelManager start")
for {
select {
case opState := <-m.communicateCh:
m.handleOpState(opState)
case <-m.closeCh:
log.Info("DataNode ChannelManager exit")
return
}
}
}()
}
func (m *ChannelManager) handleOpState(opState *opState) {
m.mu.Lock()
defer m.mu.Unlock()
log := log.With(
zap.Int64("opID", opState.opID),
zap.String("channel", opState.channel),
zap.String("State", opState.state.String()),
)
switch opState.state {
case datapb.ChannelWatchState_WatchSuccess:
log.Info("Success to watch")
m.runningFlowgraphs.Add(opState.fg)
m.finishOp(opState.opID, opState.channel)
case datapb.ChannelWatchState_WatchFailure:
log.Info("Fail to watch")
m.finishOp(opState.opID, opState.channel)
case datapb.ChannelWatchState_ReleaseSuccess:
log.Info("Success to release")
m.finishOp(opState.opID, opState.channel)
m.destoryRunner(opState.channel)
case datapb.ChannelWatchState_ReleaseFailure:
log.Info("Fail to release, add channel to abnormal lists")
m.abnormals.Insert(opState.opID, opState.channel)
m.finishOp(opState.opID, opState.channel)
m.destoryRunner(opState.channel)
}
}
func (m *ChannelManager) getOrCreateRunner(channel string) *opRunner {
runner, loaded := m.opRunners.GetOrInsert(channel, NewOpRunner(channel, m.dn, m.releaseFunc, m.communicateCh))
if !loaded {
runner.Start()
}
return runner
}
func (m *ChannelManager) destoryRunner(channel string) {
if runner, loaded := m.opRunners.GetAndRemove(channel); loaded {
runner.Close()
}
}
func (m *ChannelManager) finishOp(opID int64, channel string) {
if runner, loaded := m.opRunners.Get(channel); loaded {
runner.FinishOp(opID)
}
}
type opInfo struct {
tickler *tickler
}
type opRunner struct {
channel string
dn *DataNode
releaseFunc releaseFunc
guard sync.RWMutex
allOps map[UniqueID]*opInfo // opID -> tickler
opsInQueue chan *datapb.ChannelWatchInfo
resultCh chan *opState
closeWg sync.WaitGroup
closeOnce sync.Once
closeCh chan struct{}
}
func NewOpRunner(channel string, dn *DataNode, f releaseFunc, resultCh chan *opState) *opRunner {
return &opRunner{
channel: channel,
dn: dn,
releaseFunc: f,
opsInQueue: make(chan *datapb.ChannelWatchInfo, 10),
allOps: make(map[UniqueID]*opInfo),
resultCh: resultCh,
closeCh: make(chan struct{}),
}
}
func (r *opRunner) Start() {
r.closeWg.Add(1)
go func() {
defer r.closeWg.Done()
for {
select {
case info := <-r.opsInQueue:
r.NotifyState(r.Execute(info))
case <-r.closeCh:
return
}
}
}()
}
func (r *opRunner) FinishOp(opID UniqueID) {
r.guard.Lock()
defer r.guard.Unlock()
delete(r.allOps, opID)
}
func (r *opRunner) Exist(opID UniqueID) bool {
r.guard.RLock()
defer r.guard.RUnlock()
_, ok := r.allOps[opID]
return ok
}
func (r *opRunner) Enqueue(info *datapb.ChannelWatchInfo) error {
if info.GetState() != datapb.ChannelWatchState_ToWatch &&
info.GetState() != datapb.ChannelWatchState_ToRelease {
return errors.New("Invalid channel watch state")
}
r.guard.Lock()
defer r.guard.Unlock()
if _, ok := r.allOps[info.GetOpID()]; !ok {
r.opsInQueue <- info
r.allOps[info.GetOpID()] = &opInfo{}
}
return nil
}
func (r *opRunner) UnfinishedOpSize() int {
r.guard.RLock()
defer r.guard.RUnlock()
return len(r.allOps)
}
// Execute excutes channel operations, channel state is validated during enqueue
func (r *opRunner) Execute(info *datapb.ChannelWatchInfo) *opState {
log.Info("Start to execute channel operation",
zap.String("channel", info.GetVchan().GetChannelName()),
zap.Int64("opID", info.GetOpID()),
zap.String("state", info.GetState().String()),
)
if info.GetState() == datapb.ChannelWatchState_ToWatch {
return r.watchWithTimer(info)
}
// ToRelease state
return releaseWithTimer(r.releaseFunc, info.GetVchan().GetChannelName(), info.GetOpID())
}
// watchWithTimer will return WatchFailure after WatchTimeoutInterval
func (r *opRunner) watchWithTimer(info *datapb.ChannelWatchInfo) *opState {
opState := &opState{
channel: info.GetVchan().GetChannelName(),
opID: info.GetOpID(),
}
log := log.With(zap.String("channel", opState.channel), zap.Int64("opID", opState.opID))
r.guard.Lock()
opInfo, ok := r.allOps[info.GetOpID()]
if !ok {
opState.state = datapb.ChannelWatchState_WatchFailure
return opState
}
tickler := newTickler()
opInfo.tickler = tickler
r.guard.Unlock()
var (
successSig = make(chan struct{}, 1)
waiter sync.WaitGroup
)
watchTimeout := Params.DataCoordCfg.WatchTimeoutInterval.GetAsDuration(time.Second)
ctx, cancel := context.WithTimeout(context.Background(), watchTimeout)
defer cancel()
startTimer := func(wg *sync.WaitGroup) {
defer wg.Done()
timer := time.NewTimer(watchTimeout)
defer timer.Stop()
log.Info("Start timer for ToWatch operation", zap.Duration("timeout", watchTimeout))
for {
select {
case <-timer.C:
// watch timeout
tickler.close()
cancel()
log.Info("Stop timer for ToWatch operation timeout", zap.Duration("timeout", watchTimeout))
return
case <-tickler.progressSig:
timer.Reset(watchTimeout)
case <-successSig:
// watch success
log.Info("Stop timer for ToWatch operation succeeded", zap.Duration("timeout", watchTimeout))
return
}
}
}
waiter.Add(2)
go startTimer(&waiter)
go func() {
defer waiter.Done()
fg, err := executeWatch(ctx, r.dn, info, tickler)
if err != nil {
opState.state = datapb.ChannelWatchState_WatchFailure
} else {
opState.state = datapb.ChannelWatchState_WatchSuccess
opState.fg = fg
successSig <- struct{}{}
}
}()
waiter.Wait()
return opState
}
// releaseWithTimer will return ReleaseFailure after WatchTimeoutInterval
func releaseWithTimer(releaseFunc releaseFunc, channel string, opID UniqueID) *opState {
opState := &opState{
channel: channel,
opID: opID,
}
var (
successSig = make(chan struct{}, 1)
waiter sync.WaitGroup
)
log := log.With(zap.String("channel", channel))
startTimer := func(wg *sync.WaitGroup) {
defer wg.Done()
releaseTimeout := Params.DataCoordCfg.WatchTimeoutInterval.GetAsDuration(time.Second)
timer := time.NewTimer(releaseTimeout)
defer timer.Stop()
log.Info("Start timer for ToRelease operation", zap.Duration("timeout", releaseTimeout))
for {
select {
case <-timer.C:
log.Info("Stop timer for ToRelease operation timeout", zap.Duration("timeout", releaseTimeout))
opState.state = datapb.ChannelWatchState_ReleaseFailure
return
case <-successSig:
log.Info("Stop timer for ToRelease operation succeeded", zap.Duration("timeout", releaseTimeout))
opState.state = datapb.ChannelWatchState_ReleaseSuccess
return
}
}
}
waiter.Add(1)
go startTimer(&waiter)
go func() {
// TODO: failure should panic this DN, but we're not sure how
// to recover when releaseFunc stuck.
// Whenever we see a stuck, it's a bug need to be fixed.
// In case of the unknown behavior after the stuck of release,
// we'll mark this channel abnormal in this DN. This goroutine might never return.
//
// The channel can still be balanced into other DNs, but not on this one.
// ExclusiveConsumer error happens when the same DN subscribes the same pchannel twice.
releaseFunc(opState.channel)
successSig <- struct{}{}
}()
waiter.Wait()
return opState
}
func (r *opRunner) NotifyState(state *opState) {
r.resultCh <- state
}
func (r *opRunner) Close() {
r.guard.Lock()
for _, info := range r.allOps {
if info.tickler != nil {
info.tickler.close()
}
}
r.guard.Unlock()
r.closeOnce.Do(func() {
close(r.closeCh)
r.closeWg.Wait()
})
}
type opState struct {
channel string
opID int64
state datapb.ChannelWatchState
fg *dataSyncService
}
// executeWatch will always return, won't be stuck, either success or fail.
func executeWatch(ctx context.Context, dn *DataNode, info *datapb.ChannelWatchInfo, tickler *tickler) (*dataSyncService, error) {
dataSyncService, err := newDataSyncService(ctx, dn, info, tickler)
if err != nil {
return nil, err
}
dataSyncService.start()
return dataSyncService, nil
}
// tickler counts every time when called inc(),
type tickler struct {
count *atomic.Int32
total *atomic.Int32
closedSig *atomic.Bool
progressSig chan struct{}
}
func (t *tickler) inc() {
t.count.Inc()
t.progressSig <- struct{}{}
}
func (t *tickler) setTotal(total int32) {
t.total.Store(total)
}
// progress returns the count over total if total is set
// else just return the count number.
func (t *tickler) progress() int32 {
if t.total.Load() == 0 {
return t.count.Load()
}
return (t.count.Load() / t.total.Load()) * 100
}
func (t *tickler) close() {
t.closedSig.CompareAndSwap(false, true)
}
func (t *tickler) closed() bool {
return t.closedSig.Load()
}
func newTickler() *tickler {
return &tickler{
count: atomic.NewInt32(0),
total: atomic.NewInt32(0),
closedSig: atomic.NewBool(false),
progressSig: make(chan struct{}, 200),
}
}