milvus/internal/datacoord/garbage_collector_test.go
yihao.dai c411cb4a49
enhance: Prevent the backlog of channelCP update tasks, perform batch updates of channelCPs (#30941)
This PR includes the following adjustments:
1. To prevent channelCP update task backlog, only one task with the same
vchannel is retained in the updater. Additionally, the lastUpdateTime is
refreshed after the flowgraph submits the update task, rather than in
the callBack function.
2. Batch updates of multiple vchannel checkpoints are performed in the
UpdateChannelCheckpoint RPC (default batch size is 128). Additionally,
the lock for channelCPs in DataCoord meta has been switched from key
lock to global lock.
3. The concurrency of UpdateChannelCheckpoint RPCs in the datanode has
been reduced from 1000 to 10.

issue: https://github.com/milvus-io/milvus/issues/30004

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
Co-authored-by: jaime <yun.zhang@zilliz.com>
Co-authored-by: congqixia <congqi.xia@zilliz.com>
2024-03-07 20:39:02 +08:00

1569 lines
44 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"bytes"
"context"
"fmt"
"math/rand"
"path"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/cockroachdb/errors"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
kvmocks "github.com/milvus-io/milvus/internal/kv/mocks"
"github.com/milvus-io/milvus/internal/metastore"
"github.com/milvus-io/milvus/internal/metastore/kv/datacoord"
catalogmocks "github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
func Test_garbageCollector_basic(t *testing.T) {
bucketName := `datacoord-ut` + strings.ToLower(funcutil.RandomString(8))
rootPath := `gc` + funcutil.RandomString(8)
// TODO change to Params
cli, _, _, _, _, err := initUtOSSEnv(bucketName, rootPath, 0)
require.NoError(t, err)
meta, err := newMemoryMeta()
assert.NoError(t, err)
t.Run("normal gc", func(t *testing.T) {
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
cli: cli,
enabled: true,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
gc.start()
time.Sleep(time.Millisecond * 20)
assert.NotPanics(t, func() {
gc.close()
})
})
t.Run("with nil cli", func(t *testing.T) {
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
cli: nil,
enabled: true,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
assert.NotPanics(t, func() {
gc.start()
})
assert.NotPanics(t, func() {
gc.close()
})
})
}
func validateMinioPrefixElements(t *testing.T, cli *minio.Client, bucketName string, prefix string, elements []string) {
var current []string
for info := range cli.ListObjects(context.TODO(), bucketName, minio.ListObjectsOptions{Prefix: prefix, Recursive: true}) {
current = append(current, info.Key)
}
assert.ElementsMatch(t, elements, current)
}
func Test_garbageCollector_scan(t *testing.T) {
bucketName := `datacoord-ut` + strings.ToLower(funcutil.RandomString(8))
rootPath := paramtable.Get().MinioCfg.RootPath.GetValue()
// TODO change to Params
cli, inserts, stats, delta, others, err := initUtOSSEnv(bucketName, rootPath, 4)
require.NoError(t, err)
meta, err := newMemoryMeta()
assert.NoError(t, err)
t.Run("key is reference", func(t *testing.T) {
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
gc.scan()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
})
t.Run("missing all but save tolerance", func(t *testing.T) {
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
gc.scan()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
})
t.Run("hit, no gc", func(t *testing.T) {
segment := buildSegment(1, 10, 100, "ch", false)
segment.State = commonpb.SegmentState_Flushed
segment.Binlogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, inserts[0])}
segment.Statslogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, stats[0])}
segment.Deltalogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, delta[0])}
err = meta.AddSegment(context.TODO(), segment)
require.NoError(t, err)
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
gc.start()
gc.scan()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
})
t.Run("dropped gc one", func(t *testing.T) {
segment := buildSegment(1, 10, 100, "ch", false)
segment.State = commonpb.SegmentState_Dropped
segment.DroppedAt = uint64(time.Now().Add(-time.Hour).UnixNano())
segment.Binlogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, inserts[0])}
segment.Statslogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, stats[0])}
segment.Deltalogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, delta[0])}
err = meta.AddSegment(context.TODO(), segment)
require.NoError(t, err)
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: time.Hour * 24,
dropTolerance: 0,
})
gc.clearEtcd()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
})
t.Run("missing gc all", func(t *testing.T) {
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: 0,
dropTolerance: 0,
})
gc.start()
gc.scan()
gc.clearEtcd()
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
})
t.Run("list object with error", func(t *testing.T) {
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: 0,
dropTolerance: 0,
})
gc.start()
gc.scan()
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
})
cleanupOSS(cli.Client, bucketName, rootPath)
}
// initialize unit test sso env
func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, inserts []string, stats []string, delta []string, other []string, err error) {
paramtable.Init()
cli, err := minio.New(Params.MinioCfg.Address.GetValue(), &minio.Options{
Creds: credentials.NewStaticV4(Params.MinioCfg.AccessKeyID.GetValue(), Params.MinioCfg.SecretAccessKey.GetValue(), ""),
Secure: Params.MinioCfg.UseSSL.GetAsBool(),
})
if err != nil {
return nil, nil, nil, nil, nil, err
}
has, err := cli.BucketExists(context.TODO(), bucket)
if err != nil {
return nil, nil, nil, nil, nil, err
}
if !has {
err = cli.MakeBucket(context.TODO(), bucket, minio.MakeBucketOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
}
}
inserts = make([]string, 0, n)
stats = make([]string, 0, n)
delta = make([]string, 0, n)
other = make([]string, 0, n)
content := []byte("test")
for i := 0; i < n; i++ {
reader := bytes.NewReader(content)
// collID/partID/segID/fieldID/fileName
// [str]/id/id/string/string
var token string
if i == 1 {
token = path.Join(strconv.Itoa(i), strconv.Itoa(i), "error-seg-id", strconv.Itoa(i), fmt.Sprint(rand.Int63()))
} else {
token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), strconv.Itoa(i), fmt.Sprint(rand.Int63()))
}
// insert
filePath := path.Join(root, common.SegmentInsertLogPath, token)
info, err := cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
}
inserts = append(inserts, info.Key)
// stats
filePath = path.Join(root, common.SegmentStatslogPath, token)
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
}
stats = append(stats, info.Key)
// delta
if i == 1 {
token = path.Join(strconv.Itoa(i), strconv.Itoa(i), "error-seg-id", fmt.Sprint(rand.Int63()))
} else {
token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), fmt.Sprint(rand.Int63()))
}
filePath = path.Join(root, common.SegmentDeltaLogPath, token)
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
}
delta = append(delta, info.Key)
// other
filePath = path.Join(root, `indexes`, token)
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
}
other = append(other, info.Key)
}
mcm = &storage.MinioChunkManager{
Client: cli,
}
mcm.SetVar(bucket, root)
return mcm, inserts, stats, delta, other, nil
}
func cleanupOSS(cli *minio.Client, bucket, root string) {
ch := cli.ListObjects(context.TODO(), bucket, minio.ListObjectsOptions{Prefix: root, Recursive: true})
cli.RemoveObjects(context.TODO(), bucket, ch, minio.RemoveObjectsOptions{})
cli.RemoveBucket(context.TODO(), bucket)
}
func createMetaForRecycleUnusedIndexes(catalog metastore.DataCoordCatalog) *meta {
var (
ctx = context.Background()
collID = UniqueID(100)
// partID = UniqueID(200)
fieldID = UniqueID(300)
indexID = UniqueID(400)
)
return &meta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
collections: nil,
segments: nil,
channelCPs: newChannelCps(),
chunkManager: nil,
indexMeta: &indexMeta{
catalog: catalog,
indexes: map[UniqueID]map[UniqueID]*model.Index{
collID: {
indexID: {
TenantID: "",
CollectionID: collID,
FieldID: fieldID,
IndexID: indexID,
IndexName: "_default_idx",
IsDeleted: false,
CreateTime: 10,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
},
indexID + 1: {
TenantID: "",
CollectionID: collID,
FieldID: fieldID + 1,
IndexID: indexID + 1,
IndexName: "_default_idx_101",
IsDeleted: true,
CreateTime: 0,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
},
},
collID + 1: {
indexID + 10: {
TenantID: "",
CollectionID: collID + 1,
FieldID: fieldID + 10,
IndexID: indexID + 10,
IndexName: "index",
IsDeleted: true,
CreateTime: 10,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
},
},
},
buildID2SegmentIndex: nil,
},
}
}
func TestGarbageCollector_recycleUnusedIndexes(t *testing.T) {
t.Run("success", func(t *testing.T) {
catalog := catalogmocks.NewDataCoordCatalog(t)
catalog.On("DropIndex",
mock.Anything,
mock.Anything,
mock.Anything,
).Return(nil)
gc := newGarbageCollector(createMetaForRecycleUnusedIndexes(catalog), nil, GcOption{})
gc.recycleUnusedIndexes()
})
t.Run("fail", func(t *testing.T) {
catalog := catalogmocks.NewDataCoordCatalog(t)
catalog.On("DropIndex",
mock.Anything,
mock.Anything,
mock.Anything,
).Return(errors.New("fail"))
gc := newGarbageCollector(createMetaForRecycleUnusedIndexes(catalog), nil, GcOption{})
gc.recycleUnusedIndexes()
})
}
func createMetaForRecycleUnusedSegIndexes(catalog metastore.DataCoordCatalog) *meta {
var (
ctx = context.Background()
collID = UniqueID(100)
partID = UniqueID(200)
// fieldID = UniqueID(300)
indexID = UniqueID(400)
segID = UniqueID(500)
)
return &meta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
collections: nil,
segments: &SegmentsInfo{
segments: map[UniqueID]*SegmentInfo{
segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 1026,
State: commonpb.SegmentState_Flushed,
},
},
segID + 1: {
SegmentInfo: nil,
},
},
},
indexMeta: &indexMeta{
catalog: catalog,
segmentIndexes: map[UniqueID]map[UniqueID]*model.SegmentIndex{
segID: {
indexID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
},
},
segID + 1: {
indexID: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
},
},
},
indexes: map[UniqueID]map[UniqueID]*model.Index{},
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
buildID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
},
buildID + 1: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
},
},
},
channelCPs: nil,
chunkManager: nil,
}
}
func TestGarbageCollector_recycleUnusedSegIndexes(t *testing.T) {
t.Run("success", func(t *testing.T) {
catalog := catalogmocks.NewDataCoordCatalog(t)
catalog.On("DropSegmentIndex",
mock.Anything,
mock.Anything,
mock.Anything,
mock.Anything,
mock.Anything,
).Return(nil)
gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{})
gc.recycleUnusedSegIndexes()
})
t.Run("fail", func(t *testing.T) {
catalog := catalogmocks.NewDataCoordCatalog(t)
catalog.On("DropSegmentIndex",
mock.Anything,
mock.Anything,
mock.Anything,
mock.Anything,
mock.Anything,
).Return(errors.New("fail"))
gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{})
gc.recycleUnusedSegIndexes()
})
}
func createMetaTableForRecycleUnusedIndexFiles(catalog *datacoord.Catalog) *meta {
var (
ctx = context.Background()
collID = UniqueID(100)
partID = UniqueID(200)
// fieldID = UniqueID(300)
indexID = UniqueID(400)
segID = UniqueID(500)
buildID = UniqueID(600)
)
return &meta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
collections: nil,
segments: &SegmentsInfo{
segments: map[UniqueID]*SegmentInfo{
segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 1026,
State: commonpb.SegmentState_Flushed,
},
},
segID + 1: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 1,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 1026,
State: commonpb.SegmentState_Flushed,
},
},
},
},
indexMeta: &indexMeta{
catalog: catalog,
segmentIndexes: map[UniqueID]map[UniqueID]*model.SegmentIndex{
segID: {
indexID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
},
},
segID + 1: {
indexID: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_InProgress,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: nil,
IndexSize: 0,
WriteHandoff: false,
},
},
},
indexes: map[UniqueID]map[UniqueID]*model.Index{
collID: {
indexID: {
TenantID: "",
CollectionID: collID,
FieldID: fieldID,
IndexID: indexID,
IndexName: "_default_idx",
IsDeleted: false,
CreateTime: 10,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
},
},
},
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
buildID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
},
buildID + 1: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_InProgress,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: nil,
IndexSize: 0,
WriteHandoff: false,
},
},
},
}
}
func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
t.Run("success", func(t *testing.T) {
cm := &mocks.ChunkManager{}
cm.EXPECT().RootPath().Return("root")
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
gc := newGarbageCollector(
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
nil,
GcOption{
cli: cm,
})
gc.recycleUnusedIndexFiles()
})
t.Run("list fail", func(t *testing.T) {
cm := &mocks.ChunkManager{}
cm.EXPECT().RootPath().Return("root")
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return(nil, nil, errors.New("error"))
gc := newGarbageCollector(
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
nil,
GcOption{
cli: cm,
})
gc.recycleUnusedIndexFiles()
})
t.Run("remove fail", func(t *testing.T) {
cm := &mocks.ChunkManager{}
cm.EXPECT().RootPath().Return("root")
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
gc := newGarbageCollector(
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
nil,
GcOption{
cli: cm,
})
gc.recycleUnusedIndexFiles()
})
t.Run("remove with prefix fail", func(t *testing.T) {
cm := &mocks.ChunkManager{}
cm.EXPECT().RootPath().Return("root")
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(errors.New("error"))
gc := newGarbageCollector(
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
nil,
GcOption{
cli: cm,
})
gc.recycleUnusedIndexFiles()
})
}
func TestGarbageCollector_clearETCD(t *testing.T) {
catalog := catalogmocks.NewDataCoordCatalog(t)
catalog.On("ChannelExists",
mock.Anything,
mock.Anything,
).Return(true)
catalog.On("DropChannelCheckpoint",
mock.Anything,
mock.Anything,
).Return(nil).Maybe()
catalog.On("CreateSegmentIndex",
mock.Anything,
mock.Anything,
).Return(nil)
catalog.On("AlterSegmentIndexes",
mock.Anything,
mock.Anything,
).Return(nil)
catalog.On("DropSegment",
mock.Anything,
mock.Anything,
).Return(nil)
channelCPs := newChannelCps()
channelCPs.checkpoints["dmlChannel"] = &msgpb.MsgPosition{
Timestamp: 1000,
}
m := &meta{
catalog: catalog,
channelCPs: channelCPs,
segments: &SegmentsInfo{
compactionTo: make(map[int64]int64),
segments: map[UniqueID]*SegmentInfo{
segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 5000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 0,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
Binlogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "log1",
LogSize: 1024,
},
},
},
{
FieldID: 2,
Binlogs: []*datapb.Binlog{
{
LogPath: "log2",
LogSize: 1024,
},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "del_log1",
LogSize: 1024,
},
},
},
{
FieldID: 2,
Binlogs: []*datapb.Binlog{
{
LogPath: "del_log2",
LogSize: 1024,
},
},
},
},
Statslogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "stats_log1",
LogSize: 1024,
},
},
},
},
},
},
segID + 1: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 1,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 5000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 0,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
},
},
segID + 2: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 2,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 10000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 10,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
CompactionFrom: []int64{segID, segID + 1},
},
},
segID + 3: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 3,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 10,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
CompactionFrom: nil,
},
},
segID + 4: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 4,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 12000,
State: commonpb.SegmentState_Flushed,
MaxRowNum: 65536,
DroppedAt: 10,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
CompactionFrom: []int64{segID + 2, segID + 3},
},
},
segID + 5: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 5,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65535,
DroppedAt: 0,
CompactionFrom: nil,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 1200,
},
},
},
segID + 6: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 6,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65535,
DroppedAt: uint64(time.Now().Add(time.Hour).UnixNano()),
CompactionFrom: nil,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
Compacted: true,
},
},
// compacted and child is GCed, dml pos is big than channel cp
segID + 7: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 7,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65535,
DroppedAt: 0,
CompactionFrom: nil,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 1200,
},
Compacted: true,
},
},
},
},
indexMeta: &indexMeta{
catalog: catalog,
segmentIndexes: map[UniqueID]map[UniqueID]*model.SegmentIndex{
segID: {
indexID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 5000,
IndexID: indexID,
BuildID: buildID,
NodeID: 0,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 0,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 1024,
WriteHandoff: false,
},
},
segID + 1: {
indexID: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 5000,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 0,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 0,
IndexFileKeys: []string{"file3", "file4"},
IndexSize: 1024,
WriteHandoff: false,
},
},
},
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
buildID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 5000,
IndexID: indexID,
BuildID: buildID,
NodeID: 0,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 0,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 1024,
WriteHandoff: false,
},
buildID + 1: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 5000,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 0,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 0,
IndexFileKeys: []string{"file3", "file4"},
IndexSize: 1024,
WriteHandoff: false,
},
},
indexes: map[UniqueID]map[UniqueID]*model.Index{
collID: {
indexID: {
TenantID: "",
CollectionID: collID,
FieldID: fieldID,
IndexID: indexID,
IndexName: indexName,
IsDeleted: false,
CreateTime: 0,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
},
},
},
},
collections: map[UniqueID]*collectionInfo{
collID: {
ID: collID,
Schema: &schemapb.CollectionSchema{
Name: "",
Description: "",
AutoID: false,
Fields: []*schemapb.FieldSchema{
{
FieldID: fieldID,
Name: "",
IsPrimaryKey: false,
Description: "",
DataType: schemapb.DataType_FloatVector,
TypeParams: nil,
IndexParams: nil,
AutoID: false,
State: 0,
},
},
},
Partitions: nil,
StartPositions: nil,
Properties: nil,
},
},
}
for segID, segment := range map[UniqueID]*SegmentInfo{
segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 5000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 0,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
Binlogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "log1",
LogSize: 1024,
},
},
},
{
FieldID: 2,
Binlogs: []*datapb.Binlog{
{
LogPath: "log2",
LogSize: 1024,
},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "del_log1",
LogSize: 1024,
},
},
},
{
FieldID: 2,
Binlogs: []*datapb.Binlog{
{
LogPath: "del_log2",
LogSize: 1024,
},
},
},
},
Statslogs: []*datapb.FieldBinlog{
{
FieldID: 1,
Binlogs: []*datapb.Binlog{
{
LogPath: "stats_log1",
LogSize: 1024,
},
},
},
},
},
},
segID + 1: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 1,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 5000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 0,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
},
},
segID + 2: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 2,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 10000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 10,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
CompactionFrom: []int64{segID, segID + 1},
},
},
segID + 3: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 3,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 10,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
CompactionFrom: nil,
},
},
segID + 4: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 4,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 12000,
State: commonpb.SegmentState_Flushed,
MaxRowNum: 65536,
DroppedAt: 10,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
CompactionFrom: []int64{segID + 2, segID + 3},
},
},
segID + 5: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 5,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65535,
DroppedAt: 0,
CompactionFrom: nil,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 1200,
},
},
},
segID + 6: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 6,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65535,
DroppedAt: uint64(time.Now().Add(time.Hour).UnixNano()),
CompactionFrom: nil,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 900,
},
Compacted: true,
},
},
// compacted and child is GCed, dml pos is big than channel cp
segID + 7: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 7,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65535,
DroppedAt: 0,
CompactionFrom: nil,
DmlPosition: &msgpb.MsgPosition{
Timestamp: 1200,
},
Compacted: true,
},
},
} {
m.segments.SetSegment(segID, segment)
}
cm := &mocks.ChunkManager{}
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
gc := newGarbageCollector(
m,
newMockHandlerWithMeta(m),
GcOption{
cli: cm,
dropTolerance: 1,
})
gc.clearEtcd()
/*
A B
\ /
C D
\ /
E
E: flushed, not indexed, should not be GCed
D: dropped, not indexed, should not be GCed, since E is not GCed
C: dropped, not indexed, should not be GCed, since E is not GCed
A: dropped, indexed, should not be GCed, since C is not indexed
B: dropped, indexed, should not be GCed, since C is not indexed
F: dropped, compcated is false, should not be GCed, since dml position is larger than channel cp
G: dropped, compacted is true, missing child info, should be GCed since dml pos is less than channel cp, FAST GC do not wait drop tolerance
H: dropped, compacted is true, missing child info, should not be GCed since dml pos is larger than channel cp
conclusion: only G is GCed.
*/
segA := gc.meta.GetSegment(segID)
assert.NotNil(t, segA)
segB := gc.meta.GetSegment(segID + 1)
assert.NotNil(t, segB)
segC := gc.meta.GetSegment(segID + 2)
assert.NotNil(t, segC)
segD := gc.meta.GetSegment(segID + 3)
assert.NotNil(t, segD)
segE := gc.meta.GetSegment(segID + 4)
assert.NotNil(t, segE)
segF := gc.meta.GetSegment(segID + 5)
assert.NotNil(t, segF)
segG := gc.meta.GetSegment(segID + 6)
assert.Nil(t, segG)
segH := gc.meta.GetSegment(segID + 7)
assert.NotNil(t, segH)
err := gc.meta.indexMeta.AddSegmentIndex(&model.SegmentIndex{
SegmentID: segID + 4,
CollectionID: collID,
PartitionID: partID,
NumRows: 12000,
IndexID: indexID,
BuildID: buildID + 4,
})
assert.NoError(t, err)
err = gc.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
BuildID: buildID + 4,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2", "file3", "file4"},
SerializedSize: 10240,
FailReason: "",
})
assert.NoError(t, err)
gc.clearEtcd()
/*
A: processed prior to C, C is not GCed yet and C is not indexed, A is not GCed in this turn
B: processed prior to C, C is not GCed yet and C is not indexed, B is not GCed in this turn
E: flushed, indexed, should not be GCed
C: dropped, not indexed, should be GCed since E is indexed
D: dropped, not indexed, should be GCed since E is indexed
*/
segC = gc.meta.GetSegment(segID + 2)
assert.Nil(t, segC)
segD = gc.meta.GetSegment(segID + 3)
assert.Nil(t, segD)
gc.clearEtcd()
/*
A: compacted became false due to C is GCed already, A should be GCed since dropTolernace is meet
B: compacted became false due to C is GCed already, B should be GCed since dropTolerance is meet
*/
segA = gc.meta.GetSegment(segID)
assert.Nil(t, segA)
segB = gc.meta.GetSegment(segID + 1)
assert.Nil(t, segB)
}
func TestGarbageCollector_removelogs(t *testing.T) {
paramtable.Init()
cm := &mocks.ChunkManager{}
gc := newGarbageCollector(
nil,
nil,
GcOption{
cli: cm,
dropTolerance: 1,
})
var logs []*datapb.Binlog
for i := 0; i < 50; i++ {
logs = append(logs, &datapb.Binlog{
LogPath: "log" + strconv.Itoa(i),
})
}
t.Run("success", func(t *testing.T) {
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
defer call.Unset()
b := gc.removeLogs(logs)
assert.True(t, b)
})
t.Run("minio not found error", func(t *testing.T) {
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(minio.ErrorResponse{
Code: "NoSuchKey",
})
defer call.Unset()
b := gc.removeLogs(logs)
assert.True(t, b)
})
t.Run("minio server error", func(t *testing.T) {
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(minio.ErrorResponse{
Code: "Server Error",
})
defer call.Unset()
b := gc.removeLogs(logs)
assert.False(t, b)
})
t.Run("other type error", func(t *testing.T) {
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("other error"))
defer call.Unset()
b := gc.removeLogs(logs)
assert.False(t, b)
})
}
type GarbageCollectorSuite struct {
suite.Suite
bucketName string
rootPath string
cli *storage.MinioChunkManager
inserts []string
stats []string
delta []string
others []string
meta *meta
}
func (s *GarbageCollectorSuite) SetupTest() {
s.bucketName = `datacoord-ut` + strings.ToLower(funcutil.RandomString(8))
s.rootPath = `gc` + funcutil.RandomString(8)
var err error
s.cli, s.inserts, s.stats, s.delta, s.others, err = initUtOSSEnv(s.bucketName, s.rootPath, 4)
s.Require().NoError(err)
s.meta, err = newMemoryMeta()
s.Require().NoError(err)
}
func (s *GarbageCollectorSuite) TearDownTest() {
cleanupOSS(s.cli.Client, s.bucketName, s.rootPath)
}
func (s *GarbageCollectorSuite) TestPauseResume() {
s.Run("not_enabled", func() {
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
cli: s.cli,
enabled: false,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
gc.start()
defer gc.close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err := gc.Pause(ctx, time.Second)
s.NoError(err)
err = gc.Resume(ctx)
s.Error(err)
})
s.Run("pause_then_resume", func() {
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
cli: s.cli,
enabled: true,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
gc.start()
defer gc.close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err := gc.Pause(ctx, time.Minute)
s.NoError(err)
s.NotZero(gc.pauseUntil.Load())
err = gc.Resume(ctx)
s.NoError(err)
s.Zero(gc.pauseUntil.Load())
})
s.Run("pause_before_until", func() {
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
cli: s.cli,
enabled: true,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
gc.start()
defer gc.close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err := gc.Pause(ctx, time.Minute)
s.NoError(err)
until := gc.pauseUntil.Load()
s.NotZero(until)
err = gc.Pause(ctx, time.Second)
s.NoError(err)
second := gc.pauseUntil.Load()
s.Equal(until, second)
})
s.Run("pause_resume_timeout", func() {
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
cli: s.cli,
enabled: true,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
})
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond)
defer cancel()
err := gc.Pause(ctx, time.Minute)
s.Error(err)
s.Zero(gc.pauseUntil.Load())
err = gc.Resume(ctx)
s.Error(err)
s.Zero(gc.pauseUntil.Load())
})
}
func TestGarbageCollector(t *testing.T) {
suite.Run(t, new(GarbageCollectorSuite))
}