2021-11-08 19:49:07 +08:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2024-06-14 14:34:01 +08:00
package compaction
2021-11-08 19:49:07 +08:00
import (
"context"
2024-04-10 15:09:18 +08:00
"sync"
2021-11-08 19:49:07 +08:00
2024-01-19 11:34:54 +08:00
"github.com/samber/lo"
2021-11-08 19:49:07 +08:00
"go.uber.org/zap"
2024-06-11 17:25:55 +08:00
"golang.org/x/sync/semaphore"
2022-02-28 19:11:55 +08:00
2022-10-20 16:39:29 +08:00
"github.com/milvus-io/milvus/internal/proto/datapb"
2023-04-06 19:14:32 +08:00
"github.com/milvus-io/milvus/pkg/log"
2024-07-18 10:27:41 +08:00
"github.com/milvus-io/milvus/pkg/util/merr"
2024-06-14 14:34:01 +08:00
"github.com/milvus-io/milvus/pkg/util/paramtable"
2023-07-24 10:23:01 +08:00
"github.com/milvus-io/milvus/pkg/util/typeutil"
2021-11-08 19:49:07 +08:00
)
const (
2024-06-11 17:25:55 +08:00
maxTaskQueueNum = 1024
maxParallelTaskNum = 10
2021-11-08 19:49:07 +08:00
)
2024-06-14 14:34:01 +08:00
type Executor interface {
Start ( ctx context . Context )
2024-07-18 10:27:41 +08:00
Execute ( task Compactor ) ( bool , error )
2024-06-14 14:34:01 +08:00
Slots ( ) int64
RemoveTask ( planID int64 )
GetResults ( planID int64 ) [ ] * datapb . CompactionPlanResult
DiscardByDroppedChannel ( channel string )
DiscardPlan ( channel string )
}
type executor struct {
executing * typeutil . ConcurrentMap [ int64 , Compactor ] // planID to compactor
completedCompactor * typeutil . ConcurrentMap [ int64 , Compactor ] // planID to compactor
2023-11-14 15:56:19 +08:00
completed * typeutil . ConcurrentMap [ int64 , * datapb . CompactionPlanResult ] // planID to CompactionPlanResult
2024-06-14 14:34:01 +08:00
taskCh chan Compactor
2024-07-18 10:27:41 +08:00
taskSem * semaphore . Weighted // todo remove this, unify with slot logic
2023-07-24 10:23:01 +08:00
dropped * typeutil . ConcurrentSet [ string ] // vchannel dropped
2024-07-18 10:27:41 +08:00
usingSlots int64
slotMu sync . RWMutex
2024-04-10 15:09:18 +08:00
// To prevent concurrency of release channel and compaction get results
// all released channel's compaction tasks will be discarded
resultGuard sync . RWMutex
2021-11-08 19:49:07 +08:00
}
2024-06-14 14:34:01 +08:00
func NewExecutor ( ) * executor {
return & executor {
executing : typeutil . NewConcurrentMap [ int64 , Compactor ] ( ) ,
completedCompactor : typeutil . NewConcurrentMap [ int64 , Compactor ] ( ) ,
2023-11-14 15:56:19 +08:00
completed : typeutil . NewConcurrentMap [ int64 , * datapb . CompactionPlanResult ] ( ) ,
2024-06-14 14:34:01 +08:00
taskCh : make ( chan Compactor , maxTaskQueueNum ) ,
2024-06-11 17:25:55 +08:00
taskSem : semaphore . NewWeighted ( maxParallelTaskNum ) ,
2023-07-24 10:23:01 +08:00
dropped : typeutil . NewConcurrentSet [ string ] ( ) ,
2024-07-18 10:27:41 +08:00
usingSlots : 0 ,
2021-11-08 19:49:07 +08:00
}
}
2024-07-18 10:27:41 +08:00
func ( e * executor ) Execute ( task Compactor ) ( bool , error ) {
e . slotMu . Lock ( )
defer e . slotMu . Unlock ( )
if paramtable . Get ( ) . DataNodeCfg . SlotCap . GetAsInt64 ( ) - e . usingSlots >= task . GetSlotUsage ( ) {
newSlotUsage := task . GetSlotUsage ( )
// compatible for old datacoord or unexpected request
if task . GetSlotUsage ( ) <= 0 {
switch task . GetCompactionType ( ) {
case datapb . CompactionType_ClusteringCompaction :
newSlotUsage = paramtable . Get ( ) . DataCoordCfg . ClusteringCompactionSlotUsage . GetAsInt64 ( )
case datapb . CompactionType_MixCompaction :
newSlotUsage = paramtable . Get ( ) . DataCoordCfg . MixCompactionSlotUsage . GetAsInt64 ( )
case datapb . CompactionType_Level0DeleteCompaction :
newSlotUsage = paramtable . Get ( ) . DataCoordCfg . L0DeleteCompactionSlotUsage . GetAsInt64 ( )
}
log . Warn ( "illegal task slot usage, change it to a default value" , zap . Int64 ( "illegalSlotUsage" , task . GetSlotUsage ( ) ) , zap . Int64 ( "newSlotUsage" , newSlotUsage ) )
}
e . usingSlots = e . usingSlots + newSlotUsage
} else {
return false , merr . WrapErrDataNodeSlotExhausted ( )
}
2024-06-16 22:09:57 +08:00
_ , ok := e . executing . GetOrInsert ( task . GetPlanID ( ) , task )
if ok {
log . Warn ( "duplicated compaction task" ,
zap . Int64 ( "planID" , task . GetPlanID ( ) ) ,
zap . String ( "channel" , task . GetChannelName ( ) ) )
2024-07-18 10:27:41 +08:00
return false , merr . WrapErrDuplicatedCompactionTask ( )
2024-06-16 22:09:57 +08:00
}
2024-06-14 14:34:01 +08:00
e . taskCh <- task
2024-07-18 10:27:41 +08:00
return true , nil
2024-06-14 14:34:01 +08:00
}
func ( e * executor ) Slots ( ) int64 {
2024-07-18 10:27:41 +08:00
return paramtable . Get ( ) . DataNodeCfg . SlotCap . GetAsInt64 ( ) - e . getUsingSlots ( )
}
func ( e * executor ) getUsingSlots ( ) int64 {
e . slotMu . RLock ( )
defer e . slotMu . RUnlock ( )
return e . usingSlots
2021-11-08 19:49:07 +08:00
}
2024-06-14 14:34:01 +08:00
func ( e * executor ) toCompleteState ( task Compactor ) {
2024-05-23 09:53:40 +08:00
task . Complete ( )
2024-07-18 10:27:41 +08:00
e . getAndRemoveExecuting ( task . GetPlanID ( ) )
}
func ( e * executor ) getAndRemoveExecuting ( planID typeutil . UniqueID ) ( Compactor , bool ) {
task , ok := e . executing . GetAndRemove ( planID )
if ok {
e . slotMu . Lock ( )
e . usingSlots = e . usingSlots - task . GetSlotUsage ( )
e . slotMu . Unlock ( )
}
return task , ok
2022-01-18 17:49:39 +08:00
}
2023-06-20 10:20:41 +08:00
2024-06-14 14:34:01 +08:00
func ( e * executor ) RemoveTask ( planID int64 ) {
e . completed . GetAndRemove ( planID )
task , loaded := e . completedCompactor . GetAndRemove ( planID )
2023-03-17 17:27:56 +08:00
if loaded {
2024-05-24 09:07:41 +08:00
log . Info ( "Compaction task removed" , zap . Int64 ( "planID" , planID ) , zap . String ( "channel" , task . GetChannelName ( ) ) )
2023-03-17 17:27:56 +08:00
}
}
2022-01-18 17:49:39 +08:00
2024-06-14 14:34:01 +08:00
func ( e * executor ) Start ( ctx context . Context ) {
2021-11-08 19:49:07 +08:00
for {
select {
case <- ctx . Done ( ) :
return
2024-06-14 14:34:01 +08:00
case task := <- e . taskCh :
err := e . taskSem . Acquire ( ctx , 1 )
2024-06-11 17:25:55 +08:00
if err != nil {
return
}
go func ( ) {
2024-06-14 14:34:01 +08:00
defer e . taskSem . Release ( 1 )
e . executeTask ( task )
2024-06-11 17:25:55 +08:00
} ( )
2021-11-08 19:49:07 +08:00
}
}
}
2024-06-14 14:34:01 +08:00
func ( e * executor ) executeTask ( task Compactor ) {
2024-02-01 14:25:04 +08:00
log := log . With (
2024-05-23 09:53:40 +08:00
zap . Int64 ( "planID" , task . GetPlanID ( ) ) ,
zap . Int64 ( "Collection" , task . GetCollection ( ) ) ,
zap . String ( "channel" , task . GetChannelName ( ) ) ,
2024-02-01 14:25:04 +08:00
)
2021-11-08 19:49:07 +08:00
defer func ( ) {
2024-06-14 14:34:01 +08:00
e . toCompleteState ( task )
2021-11-08 19:49:07 +08:00
} ( )
2024-02-01 14:25:04 +08:00
log . Info ( "start to execute compaction" )
2021-11-08 19:49:07 +08:00
2024-05-23 09:53:40 +08:00
result , err := task . Compact ( )
2021-11-08 19:49:07 +08:00
if err != nil {
2024-02-01 14:25:04 +08:00
log . Warn ( "compaction task failed" , zap . Error ( err ) )
2024-05-24 09:07:41 +08:00
return
2021-11-08 19:49:07 +08:00
}
2024-06-14 14:34:01 +08:00
e . completed . Insert ( result . GetPlanID ( ) , result )
e . completedCompactor . Insert ( result . GetPlanID ( ) , task )
2021-11-08 19:49:07 +08:00
2024-05-23 09:53:40 +08:00
log . Info ( "end to execute compaction" )
2021-11-08 19:49:07 +08:00
}
2021-11-11 20:56:49 +08:00
2024-06-14 14:34:01 +08:00
func ( e * executor ) stopTask ( planID int64 ) {
2024-07-18 10:27:41 +08:00
task , loaded := e . getAndRemoveExecuting ( planID )
2021-11-11 20:56:49 +08:00
if loaded {
2024-05-23 09:53:40 +08:00
log . Warn ( "compaction executor stop task" , zap . Int64 ( "planID" , planID ) , zap . String ( "vChannelName" , task . GetChannelName ( ) ) )
task . Stop ( )
2021-11-11 20:56:49 +08:00
}
}
2024-06-14 14:34:01 +08:00
func ( e * executor ) isValidChannel ( channel string ) bool {
2021-12-02 16:39:33 +08:00
// if vchannel marked dropped, compaction should not proceed
2024-06-14 14:34:01 +08:00
return ! e . dropped . Contain ( channel )
2021-12-02 16:39:33 +08:00
}
2024-06-14 14:34:01 +08:00
func ( e * executor ) DiscardByDroppedChannel ( channel string ) {
e . dropped . Insert ( channel )
e . DiscardPlan ( channel )
2024-04-10 15:09:18 +08:00
}
2024-06-14 14:34:01 +08:00
func ( e * executor ) DiscardPlan ( channel string ) {
e . resultGuard . Lock ( )
defer e . resultGuard . Unlock ( )
2023-11-29 10:50:29 +08:00
2024-06-14 14:34:01 +08:00
e . executing . Range ( func ( planID int64 , task Compactor ) bool {
2024-05-23 09:53:40 +08:00
if task . GetChannelName ( ) == channel {
2024-06-14 14:34:01 +08:00
e . stopTask ( planID )
2021-11-11 20:56:49 +08:00
}
return true
} )
2023-11-14 15:56:19 +08:00
2023-11-29 10:50:29 +08:00
// remove all completed plans of channel
2024-06-14 14:34:01 +08:00
e . completed . Range ( func ( planID int64 , result * datapb . CompactionPlanResult ) bool {
2023-11-29 10:50:29 +08:00
if result . GetChannel ( ) == channel {
2024-06-14 14:34:01 +08:00
e . RemoveTask ( planID )
2024-04-10 15:09:18 +08:00
log . Info ( "remove compaction plan and results" ,
2023-11-29 10:50:29 +08:00
zap . String ( "channel" , channel ) ,
2023-07-24 10:23:01 +08:00
zap . Int64 ( "planID" , planID ) )
2022-10-20 16:39:29 +08:00
}
return true
} )
2021-11-11 20:56:49 +08:00
}
2023-11-14 15:56:19 +08:00
2024-06-14 14:34:01 +08:00
func ( e * executor ) GetResults ( planID int64 ) [ ] * datapb . CompactionPlanResult {
if planID != 0 {
result := e . getCompactionResult ( planID )
return [ ] * datapb . CompactionPlanResult { result }
}
return e . getAllCompactionResults ( )
}
func ( e * executor ) getCompactionResult ( planID int64 ) * datapb . CompactionPlanResult {
e . resultGuard . RLock ( )
defer e . resultGuard . RUnlock ( )
_ , ok := e . executing . Get ( planID )
2024-06-05 10:17:50 +08:00
if ok {
result := & datapb . CompactionPlanResult {
2024-06-10 21:34:08 +08:00
State : datapb . CompactionTaskState_executing ,
2024-06-05 10:17:50 +08:00
PlanID : planID ,
}
return result
}
2024-06-14 14:34:01 +08:00
result , ok2 := e . completed . Get ( planID )
2024-06-05 10:17:50 +08:00
if ! ok2 {
2024-06-10 21:34:08 +08:00
return & datapb . CompactionPlanResult {
PlanID : planID ,
State : datapb . CompactionTaskState_failed ,
}
2024-06-05 10:17:50 +08:00
}
return result
}
2024-06-14 14:34:01 +08:00
func ( e * executor ) getAllCompactionResults ( ) [ ] * datapb . CompactionPlanResult {
e . resultGuard . RLock ( )
defer e . resultGuard . RUnlock ( )
2024-01-19 11:34:54 +08:00
var (
executing [ ] int64
completed [ ] int64
completedLevelZero [ ] int64
)
2023-11-14 15:56:19 +08:00
results := make ( [ ] * datapb . CompactionPlanResult , 0 )
2023-11-29 10:50:29 +08:00
// get executing results
2024-06-14 14:34:01 +08:00
e . executing . Range ( func ( planID int64 , task Compactor ) bool {
2024-01-19 11:34:54 +08:00
executing = append ( executing , planID )
2023-11-14 15:56:19 +08:00
results = append ( results , & datapb . CompactionPlanResult {
2024-06-10 21:34:08 +08:00
State : datapb . CompactionTaskState_executing ,
2023-11-14 15:56:19 +08:00
PlanID : planID ,
} )
return true
} )
2023-11-29 10:50:29 +08:00
// get completed results
2024-06-14 14:34:01 +08:00
e . completed . Range ( func ( planID int64 , result * datapb . CompactionPlanResult ) bool {
2024-01-19 11:34:54 +08:00
completed = append ( completed , planID )
2023-11-14 15:56:19 +08:00
results = append ( results , result )
2024-01-19 11:34:54 +08:00
if result . GetType ( ) == datapb . CompactionType_Level0DeleteCompaction {
completedLevelZero = append ( completedLevelZero , planID )
}
2023-11-14 15:56:19 +08:00
return true
} )
2024-05-21 11:35:38 +08:00
// remove level zero results
2024-01-19 11:34:54 +08:00
lo . ForEach ( completedLevelZero , func ( planID int64 , _ int ) {
2024-06-14 14:34:01 +08:00
e . completed . Remove ( planID )
e . completedCompactor . Remove ( planID )
2024-01-19 11:34:54 +08:00
} )
if len ( results ) > 0 {
log . Info ( "DataNode Compaction results" ,
zap . Int64s ( "executing" , executing ) ,
zap . Int64s ( "completed" , completed ) ,
zap . Int64s ( "completed levelzero" , completedLevelZero ) ,
)
}
2023-11-14 15:56:19 +08:00
return results
}