enhance: Add concurrency for datacoord segment GC (#29561)

issue: https://github.com/milvus-io/milvus/issues/29553
/kind improvement

Signed-off-by: SimFG <bang.fu@zilliz.com>
This commit is contained in:
SimFG 2024-01-03 13:16:57 +08:00 committed by GitHub
parent b12c90af72
commit d23f87a393
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 180 additions and 52 deletions

View File

@ -36,6 +36,7 @@ import (
"github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/conc"
"github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metautil" "github.com/milvus-io/milvus/pkg/util/metautil"
"github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/paramtable"
@ -49,6 +50,8 @@ type GcOption struct {
checkInterval time.Duration // each interval checkInterval time.Duration // each interval
missingTolerance time.Duration // key missing in meta tolerance time missingTolerance time.Duration // key missing in meta tolerance time
dropTolerance time.Duration // dropped segment related key tolerance time dropTolerance time.Duration // dropped segment related key tolerance time
removeLogPool *conc.Pool[struct{}]
} }
// garbageCollector handles garbage files in object storage // garbageCollector handles garbage files in object storage
@ -75,6 +78,7 @@ type gcCmd struct {
func newGarbageCollector(meta *meta, handler Handler, opt GcOption) *garbageCollector { func newGarbageCollector(meta *meta, handler Handler, opt GcOption) *garbageCollector {
log.Info("GC with option", zap.Bool("enabled", opt.enabled), zap.Duration("interval", opt.checkInterval), log.Info("GC with option", zap.Bool("enabled", opt.enabled), zap.Duration("interval", opt.checkInterval),
zap.Duration("missingTolerance", opt.missingTolerance), zap.Duration("dropTolerance", opt.dropTolerance)) zap.Duration("missingTolerance", opt.missingTolerance), zap.Duration("dropTolerance", opt.dropTolerance))
opt.removeLogPool = conc.NewPool[struct{}](Params.DataCoordCfg.GCRemoveConcurrent.GetAsInt(), conc.WithExpiryDuration(time.Minute))
return &garbageCollector{ return &garbageCollector{
meta: meta, meta: meta,
handler: handler, handler: handler,
@ -353,6 +357,7 @@ func (gc *garbageCollector) clearEtcd() {
return dropIDs[i] < dropIDs[j] return dropIDs[i] < dropIDs[j]
}) })
log.Info("start to GC segments", zap.Int("drop_num", len(dropIDs)))
for _, segmentID := range dropIDs { for _, segmentID := range dropIDs {
segment, ok := drops[segmentID] segment, ok := drops[segmentID]
if !ok { if !ok {
@ -366,7 +371,10 @@ func (gc *garbageCollector) clearEtcd() {
} }
logs := getLogs(segment) logs := getLogs(segment)
log.Info("GC segment", zap.Int64("segmentID", segment.GetID())) log.Info("GC segment", zap.Int64("segmentID", segment.GetID()),
zap.Int("insert_logs", len(segment.GetBinlogs())),
zap.Int("delta_logs", len(segment.GetDeltalogs())),
zap.Int("stats_logs", len(segment.GetStatslogs())))
if gc.removeLogs(logs) { if gc.removeLogs(logs) {
err := gc.meta.DropSegment(segment.GetID()) err := gc.meta.DropSegment(segment.GetID())
if err != nil { if err != nil {
@ -409,22 +417,39 @@ func getLogs(sinfo *SegmentInfo) []*datapb.Binlog {
func (gc *garbageCollector) removeLogs(logs []*datapb.Binlog) bool { func (gc *garbageCollector) removeLogs(logs []*datapb.Binlog) bool {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
delFlag := true var w sync.WaitGroup
w.Add(len(logs))
for _, l := range logs { for _, l := range logs {
err := gc.option.cli.Remove(ctx, l.GetLogPath()) tmpLog := l
if err != nil { gc.option.removeLogPool.Submit(func() (struct{}, error) {
switch err.(type) { defer w.Done()
case minio.ErrorResponse: select {
errResp := minio.ToErrorResponse(err) case <-ctx.Done():
if errResp.Code != "" && errResp.Code != "NoSuchKey" { return struct{}{}, nil
delFlag = false
}
default: default:
delFlag = false err := gc.option.cli.Remove(ctx, tmpLog.GetLogPath())
if err != nil {
switch err.(type) {
case minio.ErrorResponse:
errResp := minio.ToErrorResponse(err)
if errResp.Code != "" && errResp.Code != "NoSuchKey" {
cancel()
}
default:
cancel()
}
}
return struct{}{}, nil
} }
} })
}
w.Wait()
select {
case <-ctx.Done():
return false
default:
return true
} }
return delFlag
} }
func (gc *garbageCollector) recycleUnusedIndexes() { func (gc *garbageCollector) recycleUnusedIndexes() {

View File

@ -404,9 +404,7 @@ func TestGarbageCollector_recycleUnusedIndexes(t *testing.T) {
mock.Anything, mock.Anything,
mock.Anything, mock.Anything,
).Return(nil) ).Return(nil)
gc := &garbageCollector{ gc := newGarbageCollector(createMetaForRecycleUnusedIndexes(catalog), nil, GcOption{})
meta: createMetaForRecycleUnusedIndexes(catalog),
}
gc.recycleUnusedIndexes() gc.recycleUnusedIndexes()
}) })
@ -417,9 +415,7 @@ func TestGarbageCollector_recycleUnusedIndexes(t *testing.T) {
mock.Anything, mock.Anything,
mock.Anything, mock.Anything,
).Return(errors.New("fail")) ).Return(errors.New("fail"))
gc := &garbageCollector{ gc := newGarbageCollector(createMetaForRecycleUnusedIndexes(catalog), nil, GcOption{})
meta: createMetaForRecycleUnusedIndexes(catalog),
}
gc.recycleUnusedIndexes() gc.recycleUnusedIndexes()
}) })
} }
@ -545,9 +541,7 @@ func TestGarbageCollector_recycleUnusedSegIndexes(t *testing.T) {
mock.Anything, mock.Anything,
mock.Anything, mock.Anything,
).Return(nil) ).Return(nil)
gc := &garbageCollector{ gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{})
meta: createMetaForRecycleUnusedSegIndexes(catalog),
}
gc.recycleUnusedSegIndexes() gc.recycleUnusedSegIndexes()
}) })
@ -560,9 +554,7 @@ func TestGarbageCollector_recycleUnusedSegIndexes(t *testing.T) {
mock.Anything, mock.Anything,
mock.Anything, mock.Anything,
).Return(errors.New("fail")) ).Return(errors.New("fail"))
gc := &garbageCollector{ gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{})
meta: createMetaForRecycleUnusedSegIndexes(catalog),
}
gc.recycleUnusedSegIndexes() gc.recycleUnusedSegIndexes()
}) })
} }
@ -707,12 +699,13 @@ func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil) cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil) cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil) cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
gc := &garbageCollector{ gc := newGarbageCollector(
meta: createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}), createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
option: GcOption{ nil,
GcOption{
cli: cm, cli: cm,
}, })
}
gc.recycleUnusedIndexFiles() gc.recycleUnusedIndexFiles()
}) })
@ -720,12 +713,12 @@ func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
cm := &mocks.ChunkManager{} cm := &mocks.ChunkManager{}
cm.EXPECT().RootPath().Return("root") cm.EXPECT().RootPath().Return("root")
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return(nil, nil, errors.New("error")) cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return(nil, nil, errors.New("error"))
gc := &garbageCollector{ gc := newGarbageCollector(
meta: createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}), createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
option: GcOption{ nil,
GcOption{
cli: cm, cli: cm,
}, })
}
gc.recycleUnusedIndexFiles() gc.recycleUnusedIndexFiles()
}) })
@ -735,12 +728,12 @@ func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error")) cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil) cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil) cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
gc := &garbageCollector{ gc := newGarbageCollector(
meta: createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}), createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
option: GcOption{ nil,
GcOption{
cli: cm, cli: cm,
}, })
}
gc.recycleUnusedIndexFiles() gc.recycleUnusedIndexFiles()
}) })
@ -750,12 +743,12 @@ func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error")) cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil) cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(errors.New("error")) cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(errors.New("error"))
gc := &garbageCollector{ gc := newGarbageCollector(
meta: createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}), createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
option: GcOption{ nil,
GcOption{
cli: cm, cli: cm,
}, })
}
gc.recycleUnusedIndexFiles() gc.recycleUnusedIndexFiles()
}) })
} }
@ -804,6 +797,57 @@ func TestGarbageCollector_clearETCD(t *testing.T) {
DmlPosition: &msgpb.MsgPosition{ DmlPosition: &msgpb.MsgPosition{
Timestamp: 900, Timestamp: 900,
}, },
Binlogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "log1",
LogSize: 1024,
},
},
},
{
FieldID: 2,
Binlogs: []*datapb.Binlog{
{
LogPath: "log2",
LogSize: 1024,
},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "del_log1",
LogSize: 1024,
},
},
},
{
FieldID: 2,
Binlogs: []*datapb.Binlog{
{
LogPath: "del_log2",
LogSize: 1024,
},
},
},
},
Statslogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "stats_log1",
LogSize: 1024,
},
},
},
},
}, },
segmentIndexes: map[UniqueID]*model.SegmentIndex{ segmentIndexes: map[UniqueID]*model.SegmentIndex{
indexID: { indexID: {
@ -1045,14 +1089,13 @@ func TestGarbageCollector_clearETCD(t *testing.T) {
} }
cm := &mocks.ChunkManager{} cm := &mocks.ChunkManager{}
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil) cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
gc := &garbageCollector{ gc := newGarbageCollector(
option: GcOption{ m,
cli: &mocks.ChunkManager{}, newMockHandlerWithMeta(m),
GcOption{
cli: cm,
dropTolerance: 1, dropTolerance: 1,
}, })
meta: m,
handler: newMockHandlerWithMeta(m),
}
gc.clearEtcd() gc.clearEtcd()
/* /*
@ -1136,6 +1179,56 @@ func TestGarbageCollector_clearETCD(t *testing.T) {
assert.Nil(t, segB) assert.Nil(t, segB)
} }
func TestGarbageCollector_removelogs(t *testing.T) {
paramtable.Init()
cm := &mocks.ChunkManager{}
gc := newGarbageCollector(
nil,
nil,
GcOption{
cli: cm,
dropTolerance: 1,
})
var logs []*datapb.Binlog
for i := 0; i < 50; i++ {
logs = append(logs, &datapb.Binlog{
LogPath: "log" + strconv.Itoa(i),
})
}
t.Run("success", func(t *testing.T) {
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
defer call.Unset()
b := gc.removeLogs(logs)
assert.True(t, b)
})
t.Run("minio not found error", func(t *testing.T) {
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(minio.ErrorResponse{
Code: "NoSuchKey",
})
defer call.Unset()
b := gc.removeLogs(logs)
assert.True(t, b)
})
t.Run("minio server error", func(t *testing.T) {
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(minio.ErrorResponse{
Code: "Server Error",
})
defer call.Unset()
b := gc.removeLogs(logs)
assert.False(t, b)
})
t.Run("other type error", func(t *testing.T) {
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("other error"))
defer call.Unset()
b := gc.removeLogs(logs)
assert.False(t, b)
})
}
type GarbageCollectorSuite struct { type GarbageCollectorSuite struct {
suite.Suite suite.Suite

View File

@ -2238,6 +2238,7 @@ type dataCoordConfig struct {
GCInterval ParamItem `refreshable:"false"` GCInterval ParamItem `refreshable:"false"`
GCMissingTolerance ParamItem `refreshable:"false"` GCMissingTolerance ParamItem `refreshable:"false"`
GCDropTolerance ParamItem `refreshable:"false"` GCDropTolerance ParamItem `refreshable:"false"`
GCRemoveConcurrent ParamItem `refreshable:"false"`
EnableActiveStandby ParamItem `refreshable:"false"` EnableActiveStandby ParamItem `refreshable:"false"`
BindIndexNodeMode ParamItem `refreshable:"false"` BindIndexNodeMode ParamItem `refreshable:"false"`
@ -2587,6 +2588,15 @@ During compaction, the size of segment # of rows is able to exceed segment max #
} }
p.GCDropTolerance.Init(base.mgr) p.GCDropTolerance.Init(base.mgr)
p.GCRemoveConcurrent = ParamItem{
Key: "dataCoord.gc.removeConcurrent",
Version: "2.3.4",
DefaultValue: "32",
Doc: "number of concurrent goroutines to remove dropped s3 objects",
Export: true,
}
p.GCRemoveConcurrent.Init(base.mgr)
p.EnableActiveStandby = ParamItem{ p.EnableActiveStandby = ParamItem{
Key: "dataCoord.enableActiveStandby", Key: "dataCoord.enableActiveStandby",
Version: "2.0.0", Version: "2.0.0",