enhance: Rootcoord's stop may block in quota_center's stop (#31447)

this PR fixed that rootcoord's stop may block in quota_center' stop

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2024-04-01 21:09:18 +08:00 committed by GitHub
parent 4e264003bf
commit 9111ecab82
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 60 additions and 5 deletions

View File

@ -88,6 +88,9 @@ type collectionStates = map[milvuspb.QuotaState]commonpb.ErrorCode
//
// If necessary, user can also manually force to deny RW requests.
type QuotaCenter struct {
ctx context.Context
cancel context.CancelFunc
// clients
proxies proxyutil.ProxyClientManagerInterface
queryCoord types.QueryCoordClient
@ -113,11 +116,16 @@ type QuotaCenter struct {
stopOnce sync.Once
stopChan chan struct{}
wg sync.WaitGroup
}
// NewQuotaCenter returns a new QuotaCenter.
func NewQuotaCenter(proxies proxyutil.ProxyClientManagerInterface, queryCoord types.QueryCoordClient, dataCoord types.DataCoordClient, tsoAllocator tso.Allocator, meta IMetaTable) *QuotaCenter {
ctx, cancel := context.WithCancel(context.TODO())
return &QuotaCenter{
ctx: ctx,
cancel: cancel,
proxies: proxies,
queryCoord: queryCoord,
dataCoord: dataCoord,
@ -133,8 +141,15 @@ func NewQuotaCenter(proxies proxyutil.ProxyClientManagerInterface, queryCoord ty
}
}
func (q *QuotaCenter) Start() {
q.wg.Add(1)
go q.run()
}
// run starts the service of QuotaCenter.
func (q *QuotaCenter) run() {
defer q.wg.Done()
interval := time.Duration(Params.QuotaConfig.QuotaCenterCollectInterval.GetAsFloat() * float64(time.Second))
log.Info("Start QuotaCenter", zap.Duration("collectInterval", interval))
ticker := time.NewTicker(interval)
@ -166,9 +181,13 @@ func (q *QuotaCenter) run() {
// stop would stop the service of QuotaCenter.
func (q *QuotaCenter) stop() {
log.Info("stop quota center")
q.stopOnce.Do(func() {
q.stopChan <- struct{}{}
// cancel all blocking request to coord
q.cancel()
close(q.stopChan)
})
q.wg.Wait()
}
// clearMetrics removes all metrics stored in QuotaCenter.
@ -236,7 +255,7 @@ func (q *QuotaCenter) syncMetrics() error {
oldDataNodes := typeutil.NewSet(lo.Keys(q.dataNodeMetrics)...)
oldQueryNodes := typeutil.NewSet(lo.Keys(q.queryNodeMetrics)...)
q.clearMetrics()
ctx, cancel := context.WithTimeout(context.Background(), GetMetricsTimeout)
ctx, cancel := context.WithTimeout(q.ctx, GetMetricsTimeout)
defer cancel()
group := &errgroup.Group{}
@ -862,7 +881,7 @@ func (q *QuotaCenter) checkDiskQuota() {
// setRates notifies Proxies to set rates for different rate types.
func (q *QuotaCenter) setRates() error {
ctx, cancel := context.WithTimeout(context.Background(), SetRatesTimeout)
ctx, cancel := context.WithTimeout(q.ctx, SetRatesTimeout)
defer cancel()
toCollectionRate := func(collection int64, currentRates map[internalpb.RateType]ratelimitutil.Limit) *proxypb.CollectionRate {

View File

@ -28,6 +28,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"google.golang.org/grpc"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
@ -67,11 +68,46 @@ func TestQuotaCenter(t *testing.T) {
meta.EXPECT().GetCollectionByID(mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, merr.ErrCollectionNotFound).Maybe()
quotaCenter := NewQuotaCenter(pcm, qc, dc, core.tsoAllocator, meta)
go quotaCenter.run()
quotaCenter.Start()
time.Sleep(10 * time.Millisecond)
quotaCenter.stop()
})
t.Run("test QuotaCenter stop", func(t *testing.T) {
qc := mocks.NewMockQueryCoordClient(t)
meta := mockrootcoord.NewIMetaTable(t)
paramtable.Get().Save(paramtable.Get().QuotaConfig.QuotaCenterCollectInterval.Key, "1")
defer paramtable.Get().Reset(paramtable.Get().QuotaConfig.QuotaCenterCollectInterval.Key)
qc.ExpectedCalls = nil
// mock query coord stuck for at most 10s
qc.EXPECT().GetMetrics(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, gmr *milvuspb.GetMetricsRequest, co ...grpc.CallOption) (*milvuspb.GetMetricsResponse, error) {
counter := 0
for {
select {
case <-ctx.Done():
return nil, merr.ErrCollectionNotFound
default:
if counter < 10 {
time.Sleep(1 * time.Second)
counter++
}
}
}
})
meta.EXPECT().GetCollectionByID(mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, merr.ErrCollectionNotFound).Maybe()
quotaCenter := NewQuotaCenter(pcm, qc, dc, core.tsoAllocator, meta)
quotaCenter.Start()
time.Sleep(3 * time.Second)
// assert stop won't stuck more than 5s
start := time.Now()
quotaCenter.stop()
assert.True(t, time.Since(start).Seconds() <= 5)
})
t.Run("test syncMetrics", func(t *testing.T) {
qc := mocks.NewMockQueryCoordClient(t)
meta := mockrootcoord.NewIMetaTable(t)

View File

@ -668,7 +668,7 @@ func (c *Core) startInternal() error {
}
if Params.QuotaConfig.QuotaAndLimitsEnabled.GetAsBool() {
go c.quotaCenter.run()
c.quotaCenter.Start()
}
c.scheduler.Start()