mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 03:48:37 +08:00
enhance: use WalkWithPrefix api for oss, enable piplined file gc (#31740)
issue: #19095,#29655,#31718 - Change `ListWithPrefix` to `WalkWithPrefix` of OOS into a pipeline mode. - File garbage collection is performed in other goroutine. - Segment Index Recycle clean index file too. --------- Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
parent
f06509bf97
commit
2586c2f1b3
@ -102,6 +102,7 @@ minio:
|
||||
region: # Specify minio storage system location region
|
||||
useVirtualHost: false # Whether use virtual host mode for bucket
|
||||
requestTimeoutMs: 10000 # minio timeout for request time in milliseconds
|
||||
listObjectsMaxKeys: 0 # The maximum number of objects requested per batch in minio ListObjects rpc, 0 means using oss client by default, decrease these configration if ListObjects timeout
|
||||
|
||||
# Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka.
|
||||
# You can change your mq by setting mq.type field.
|
||||
@ -443,7 +444,7 @@ dataCoord:
|
||||
enableGarbageCollection: true
|
||||
gc:
|
||||
interval: 3600 # gc interval in seconds
|
||||
missingTolerance: 3600 # file meta missing tolerance duration in seconds, default to 1hr
|
||||
missingTolerance: 86400 # file meta missing tolerance duration in seconds, default to 24hr(1d)
|
||||
dropTolerance: 10800 # file belongs to dropped entity tolerance duration in seconds. 3600
|
||||
removeConcurrent: 32 # number of concurrent goroutines to remove dropped s3 objects
|
||||
scanInterval: 168 # garbage collection scan residue interval in hours
|
||||
|
@ -20,18 +20,17 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio-go/v7"
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/samber/lo"
|
||||
"go.uber.org/atomic"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus/internal/metastore/kv/binlog"
|
||||
"github.com/milvus-io/milvus/internal/metastore/model"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
@ -53,12 +52,15 @@ type GcOption struct {
|
||||
dropTolerance time.Duration // dropped segment related key tolerance time
|
||||
scanInterval time.Duration // interval for scan residue for interupted log wrttien
|
||||
|
||||
removeLogPool *conc.Pool[struct{}]
|
||||
removeObjectPool *conc.Pool[struct{}]
|
||||
}
|
||||
|
||||
// garbageCollector handles garbage files in object storage
|
||||
// which could be dropped collection remanent or data node failure traces
|
||||
type garbageCollector struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
|
||||
option GcOption
|
||||
meta *meta
|
||||
handler Handler
|
||||
@ -66,7 +68,6 @@ type garbageCollector struct {
|
||||
startOnce sync.Once
|
||||
stopOnce sync.Once
|
||||
wg sync.WaitGroup
|
||||
closeCh chan struct{}
|
||||
cmdCh chan gcCmd
|
||||
pauseUntil atomic.Time
|
||||
}
|
||||
@ -84,12 +85,14 @@ func newGarbageCollector(meta *meta, handler Handler, opt GcOption) *garbageColl
|
||||
zap.Duration("scanInterval", opt.scanInterval),
|
||||
zap.Duration("missingTolerance", opt.missingTolerance),
|
||||
zap.Duration("dropTolerance", opt.dropTolerance))
|
||||
opt.removeLogPool = conc.NewPool[struct{}](Params.DataCoordCfg.GCRemoveConcurrent.GetAsInt(), conc.WithExpiryDuration(time.Minute))
|
||||
opt.removeObjectPool = conc.NewPool[struct{}](Params.DataCoordCfg.GCRemoveConcurrent.GetAsInt(), conc.WithExpiryDuration(time.Minute))
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
return &garbageCollector{
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
meta: meta,
|
||||
handler: handler,
|
||||
option: opt,
|
||||
closeCh: make(chan struct{}),
|
||||
cmdCh: make(chan gcCmd),
|
||||
}
|
||||
}
|
||||
@ -102,8 +105,7 @@ func (gc *garbageCollector) start() {
|
||||
return
|
||||
}
|
||||
gc.startOnce.Do(func() {
|
||||
gc.wg.Add(1)
|
||||
go gc.work()
|
||||
gc.work(gc.ctx)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -146,26 +148,35 @@ func (gc *garbageCollector) Resume(ctx context.Context) error {
|
||||
}
|
||||
|
||||
// work contains actual looping check logic
|
||||
func (gc *garbageCollector) work() {
|
||||
func (gc *garbageCollector) work(ctx context.Context) {
|
||||
// TODO: fast cancel for gc when closing.
|
||||
// Run gc tasks in parallel.
|
||||
gc.wg.Add(3)
|
||||
go func() {
|
||||
defer gc.wg.Done()
|
||||
ticker := time.NewTicker(gc.option.checkInterval)
|
||||
defer ticker.Stop()
|
||||
scanTicker := time.NewTicker(gc.option.scanInterval)
|
||||
defer scanTicker.Stop()
|
||||
gc.runRecycleTaskWithPauser(ctx, "meta", gc.option.checkInterval, func(ctx context.Context) {
|
||||
gc.recycleDroppedSegments(ctx)
|
||||
gc.recycleUnusedIndexes(ctx)
|
||||
gc.recycleUnusedSegIndexes(ctx)
|
||||
})
|
||||
}()
|
||||
go func() {
|
||||
defer gc.wg.Done()
|
||||
gc.runRecycleTaskWithPauser(ctx, "orphan", gc.option.scanInterval, func(ctx context.Context) {
|
||||
gc.recycleUnusedBinlogFiles(ctx)
|
||||
gc.recycleUnusedIndexFiles(ctx)
|
||||
})
|
||||
}()
|
||||
go func() {
|
||||
defer gc.wg.Done()
|
||||
gc.startControlLoop(ctx)
|
||||
}()
|
||||
}
|
||||
|
||||
// startControlLoop start a control loop for garbageCollector.
|
||||
func (gc *garbageCollector) startControlLoop(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
if time.Now().Before(gc.pauseUntil.Load()) {
|
||||
log.Info("garbage collector paused", zap.Time("until", gc.pauseUntil.Load()))
|
||||
continue
|
||||
}
|
||||
gc.clearEtcd()
|
||||
gc.recycleUnusedIndexes()
|
||||
gc.recycleUnusedSegIndexes()
|
||||
gc.recycleUnusedIndexFiles()
|
||||
case <-scanTicker.C:
|
||||
log.Info("Garbage collector start to scan interrupted write residue")
|
||||
gc.scan()
|
||||
case cmd := <-gc.cmdCh:
|
||||
switch cmd.cmdType {
|
||||
case datapb.GcCommand_Pause:
|
||||
@ -182,112 +193,175 @@ func (gc *garbageCollector) work() {
|
||||
log.Info("garbage collection resumed")
|
||||
}
|
||||
close(cmd.done)
|
||||
case <-gc.closeCh:
|
||||
log.Warn("garbage collector quit")
|
||||
case <-gc.ctx.Done():
|
||||
log.Warn("garbage collector control loop quit")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runRecycleTaskWithPauser is a helper function to create a task with pauser
|
||||
func (gc *garbageCollector) runRecycleTaskWithPauser(ctx context.Context, name string, interval time.Duration, task func(ctx context.Context)) {
|
||||
logger := log.With(zap.String("gcType", name)).With(zap.Duration("interval", interval))
|
||||
timer := time.NewTimer(interval)
|
||||
defer timer.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-timer.C:
|
||||
if time.Now().Before(gc.pauseUntil.Load()) {
|
||||
logger.Info("garbage collector paused", zap.Time("until", gc.pauseUntil.Load()))
|
||||
continue
|
||||
}
|
||||
logger.Info("garbage collector recycle task start...")
|
||||
start := time.Now()
|
||||
task(ctx)
|
||||
logger.Info("garbage collector recycle task done", zap.Duration("timeCost", time.Since(start)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// close stop the garbage collector.
|
||||
func (gc *garbageCollector) close() {
|
||||
gc.stopOnce.Do(func() {
|
||||
close(gc.closeCh)
|
||||
gc.cancel()
|
||||
gc.wg.Wait()
|
||||
})
|
||||
}
|
||||
|
||||
// scan load meta file info and compares OSS keys
|
||||
// recycleUnusedBinlogFiles load meta file info and compares OSS keys
|
||||
// if missing found, performs gc cleanup
|
||||
func (gc *garbageCollector) scan() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
func (gc *garbageCollector) recycleUnusedBinlogFiles(ctx context.Context) {
|
||||
start := time.Now()
|
||||
log := log.With(zap.String("gcName", "recycleUnusedBinlogFiles"), zap.Time("startAt", start))
|
||||
log.Info("start recycleUnusedBinlogFiles...")
|
||||
defer func() { log.Info("recycleUnusedBinlogFiles done", zap.Duration("timeCost", time.Since(start))) }()
|
||||
|
||||
var (
|
||||
total = 0
|
||||
valid = 0
|
||||
missing = 0
|
||||
)
|
||||
getMetaMap := func() (typeutil.UniqueSet, typeutil.Set[string]) {
|
||||
segmentMap := typeutil.NewUniqueSet()
|
||||
filesMap := typeutil.NewSet[string]()
|
||||
segments := gc.meta.GetAllSegmentsUnsafe()
|
||||
for _, segment := range segments {
|
||||
cloned := segment.Clone()
|
||||
binlog.DecompressBinLogs(cloned.SegmentInfo)
|
||||
segmentMap.Insert(segment.GetID())
|
||||
for _, log := range getLogs(cloned) {
|
||||
filesMap.Insert(log.GetLogPath())
|
||||
type scanTask struct {
|
||||
prefix string
|
||||
checker func(objectInfo *storage.ChunkObjectInfo, segment *SegmentInfo) bool
|
||||
label string
|
||||
}
|
||||
}
|
||||
return segmentMap, filesMap
|
||||
}
|
||||
|
||||
// walk only data cluster related prefixes
|
||||
prefixes := make([]string, 0, 3)
|
||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath))
|
||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath))
|
||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath))
|
||||
labels := []string{metrics.InsertFileLabel, metrics.StatFileLabel, metrics.DeleteFileLabel}
|
||||
var removedKeys []string
|
||||
|
||||
for idx, prefix := range prefixes {
|
||||
startTs := time.Now()
|
||||
infoKeys, modTimes, err := gc.option.cli.ListWithPrefix(ctx, prefix, true)
|
||||
scanTasks := []scanTask{
|
||||
{
|
||||
prefix: path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath),
|
||||
checker: func(objectInfo *storage.ChunkObjectInfo, segment *SegmentInfo) bool {
|
||||
return segment != nil
|
||||
},
|
||||
label: metrics.InsertFileLabel,
|
||||
},
|
||||
{
|
||||
prefix: path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath),
|
||||
checker: func(objectInfo *storage.ChunkObjectInfo, segment *SegmentInfo) bool {
|
||||
logID, err := binlog.GetLogIDFromBingLogPath(objectInfo.FilePath)
|
||||
if err != nil {
|
||||
log.Error("failed to list files with prefix",
|
||||
zap.String("prefix", prefix),
|
||||
zap.Error(err),
|
||||
)
|
||||
log.Warn("garbageCollector find dirty stats log", zap.String("filePath", objectInfo.FilePath), zap.Error(err))
|
||||
return false
|
||||
}
|
||||
cost := time.Since(startTs)
|
||||
segmentMap, filesMap := getMetaMap()
|
||||
metrics.GarbageCollectorListLatency.
|
||||
WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), labels[idx]).
|
||||
Observe(float64(cost.Milliseconds()))
|
||||
log.Info("gc scan finish list object", zap.String("prefix", prefix), zap.Duration("time spent", cost), zap.Int("keys", len(infoKeys)))
|
||||
for i, infoKey := range infoKeys {
|
||||
total++
|
||||
_, has := filesMap[infoKey]
|
||||
if has {
|
||||
valid++
|
||||
continue
|
||||
}
|
||||
|
||||
segmentID, err := storage.ParseSegmentIDByBinlog(gc.option.cli.RootPath(), infoKey)
|
||||
return segment != nil && segment.IsStatsLogExists(logID)
|
||||
},
|
||||
label: metrics.StatFileLabel,
|
||||
},
|
||||
{
|
||||
prefix: path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath),
|
||||
checker: func(objectInfo *storage.ChunkObjectInfo, segment *SegmentInfo) bool {
|
||||
logID, err := binlog.GetLogIDFromBingLogPath(objectInfo.FilePath)
|
||||
if err != nil {
|
||||
missing++
|
||||
log.Warn("parse segment id error",
|
||||
zap.String("infoKey", infoKey),
|
||||
zap.Error(err))
|
||||
continue
|
||||
log.Warn("garbageCollector find dirty dleta log", zap.String("filePath", objectInfo.FilePath), zap.Error(err))
|
||||
return false
|
||||
}
|
||||
return segment != nil && segment.IsDeltaLogExists(logID)
|
||||
},
|
||||
label: metrics.DeleteFileLabel,
|
||||
},
|
||||
}
|
||||
|
||||
if strings.Contains(prefix, common.SegmentInsertLogPath) &&
|
||||
segmentMap.Contain(segmentID) {
|
||||
valid++
|
||||
continue
|
||||
}
|
||||
|
||||
// not found in meta, check last modified time exceeds tolerance duration
|
||||
if time.Since(modTimes[i]) > gc.option.missingTolerance {
|
||||
// ignore error since it could be cleaned up next time
|
||||
removedKeys = append(removedKeys, infoKey)
|
||||
err = gc.option.cli.Remove(ctx, infoKey)
|
||||
if err != nil {
|
||||
missing++
|
||||
log.Error("failed to remove object",
|
||||
zap.String("infoKey", infoKey),
|
||||
zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, task := range scanTasks {
|
||||
gc.recycleUnusedBinLogWithChecker(ctx, task.prefix, task.label, task.checker)
|
||||
}
|
||||
metrics.GarbageCollectorRunCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Add(1)
|
||||
log.Info("scan file to do garbage collection",
|
||||
}
|
||||
|
||||
// recycleUnusedBinLogWithChecker scans the prefix and checks the path with checker.
|
||||
// GC the file if checker returns false.
|
||||
func (gc *garbageCollector) recycleUnusedBinLogWithChecker(ctx context.Context, prefix string, label string, checker func(objectInfo *storage.ChunkObjectInfo, segment *SegmentInfo) bool) {
|
||||
logger := log.With(zap.String("prefix", prefix))
|
||||
logger.Info("garbageCollector recycleUnusedBinlogFiles start", zap.String("prefix", prefix))
|
||||
lastFilePath := ""
|
||||
total := 0
|
||||
valid := 0
|
||||
unexpectedFailure := atomic.NewInt32(0)
|
||||
removed := atomic.NewInt32(0)
|
||||
start := time.Now()
|
||||
|
||||
futures := make([]*conc.Future[struct{}], 0)
|
||||
err := gc.option.cli.WalkWithPrefix(ctx, prefix, true, func(chunkInfo *storage.ChunkObjectInfo) bool {
|
||||
total++
|
||||
lastFilePath = chunkInfo.FilePath
|
||||
|
||||
// Check file tolerance first to avoid unnecessary operation.
|
||||
if time.Since(chunkInfo.ModifyTime) <= gc.option.missingTolerance {
|
||||
logger.Info("garbageCollector recycleUnusedBinlogFiles skip file since it is not expired", zap.String("filePath", chunkInfo.FilePath), zap.Time("modifyTime", chunkInfo.ModifyTime))
|
||||
return true
|
||||
}
|
||||
|
||||
// Parse segmentID from file path.
|
||||
// TODO: Does all files in the same segment have the same segmentID?
|
||||
segmentID, err := storage.ParseSegmentIDByBinlog(gc.option.cli.RootPath(), chunkInfo.FilePath)
|
||||
if err != nil {
|
||||
unexpectedFailure.Inc()
|
||||
logger.Warn("garbageCollector recycleUnusedBinlogFiles parse segment id error",
|
||||
zap.String("filePath", chunkInfo.FilePath),
|
||||
zap.Error(err))
|
||||
return true
|
||||
}
|
||||
|
||||
segment := gc.meta.GetSegment(segmentID)
|
||||
if checker(chunkInfo, segment) {
|
||||
valid++
|
||||
logger.Info("garbageCollector recycleUnusedBinlogFiles skip file since it is valid", zap.String("filePath", chunkInfo.FilePath), zap.Int64("segmentID", segmentID))
|
||||
return true
|
||||
}
|
||||
|
||||
// ignore error since it could be cleaned up next time
|
||||
file := chunkInfo.FilePath
|
||||
future := gc.option.removeObjectPool.Submit(func() (struct{}, error) {
|
||||
logger := logger.With(zap.String("file", file))
|
||||
logger.Info("garbageCollector recycleUnusedBinlogFiles remove file...")
|
||||
|
||||
if err = gc.option.cli.Remove(ctx, file); err != nil {
|
||||
log.Warn("garbageCollector recycleUnusedBinlogFiles remove file failed", zap.Error(err))
|
||||
unexpectedFailure.Inc()
|
||||
return struct{}{}, err
|
||||
}
|
||||
log.Info("garbageCollector recycleUnusedBinlogFiles remove file success")
|
||||
removed.Inc()
|
||||
return struct{}{}, nil
|
||||
})
|
||||
futures = append(futures, future)
|
||||
return true
|
||||
})
|
||||
// Wait for all remove tasks done.
|
||||
if err := conc.BlockOnAll(futures...); err != nil {
|
||||
// error is logged, and can be ignored here.
|
||||
logger.Warn("some task failure in remove object pool", zap.Error(err))
|
||||
}
|
||||
|
||||
cost := time.Since(start)
|
||||
logger.Info("garbageCollector recycleUnusedBinlogFiles done",
|
||||
zap.Int("total", total),
|
||||
zap.Int("valid", valid),
|
||||
zap.Int("missing", missing),
|
||||
zap.Strings("removedKeys", removedKeys))
|
||||
zap.Int("unexpectedFailure", int(unexpectedFailure.Load())),
|
||||
zap.Int("removed", int(removed.Load())),
|
||||
zap.String("lastFilePath", lastFilePath),
|
||||
zap.Duration("cost", cost),
|
||||
zap.Error(err))
|
||||
|
||||
metrics.GarbageCollectorFileScanDuration.
|
||||
WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), label).
|
||||
Observe(float64(cost.Milliseconds()))
|
||||
}
|
||||
|
||||
func (gc *garbageCollector) checkDroppedSegmentGC(segment *SegmentInfo,
|
||||
@ -330,7 +404,13 @@ func (gc *garbageCollector) checkDroppedSegmentGC(segment *SegmentInfo,
|
||||
return true
|
||||
}
|
||||
|
||||
func (gc *garbageCollector) clearEtcd() {
|
||||
// recycleDroppedSegments scans all segments and remove those dropped segments from meta and oss.
|
||||
func (gc *garbageCollector) recycleDroppedSegments(ctx context.Context) {
|
||||
start := time.Now()
|
||||
log := log.With(zap.String("gcName", "recycleDroppedSegments"), zap.Time("startAt", start))
|
||||
log.Info("start clear dropped segments...")
|
||||
defer func() { log.Info("clear dropped segments done", zap.Duration("timeCost", time.Since(start))) }()
|
||||
|
||||
all := gc.meta.SelectSegments(func(si *SegmentInfo) bool { return true })
|
||||
drops := make(map[int64]*SegmentInfo, 0)
|
||||
compactTo := make(map[int64]*SegmentInfo)
|
||||
@ -367,42 +447,40 @@ func (gc *garbageCollector) clearEtcd() {
|
||||
channelCPs[channel] = pos.GetTimestamp()
|
||||
}
|
||||
|
||||
dropIDs := lo.Keys(drops)
|
||||
sort.Slice(dropIDs, func(i, j int) bool {
|
||||
return dropIDs[i] < dropIDs[j]
|
||||
})
|
||||
|
||||
log.Info("start to GC segments", zap.Int("drop_num", len(dropIDs)))
|
||||
for _, segmentID := range dropIDs {
|
||||
segment, ok := drops[segmentID]
|
||||
if !ok {
|
||||
log.Warn("segmentID is not in drops", zap.Int64("segmentID", segmentID))
|
||||
continue
|
||||
log.Info("start to GC segments", zap.Int("drop_num", len(drops)))
|
||||
for segmentID, segment := range drops {
|
||||
if ctx.Err() != nil {
|
||||
// process canceled, stop.
|
||||
return
|
||||
}
|
||||
|
||||
log := log.With(zap.Int64("segmentID", segmentID))
|
||||
segInsertChannel := segment.GetInsertChannel()
|
||||
if !gc.checkDroppedSegmentGC(segment, compactTo[segment.GetID()], indexedSet, channelCPs[segInsertChannel]) {
|
||||
continue
|
||||
}
|
||||
|
||||
logs := getLogs(segment)
|
||||
log.Info("GC segment", zap.Int64("segmentID", segment.GetID()),
|
||||
zap.Int("insert_logs", len(segment.GetBinlogs())),
|
||||
log.Info("GC segment start...", zap.Int("insert_logs", len(segment.GetBinlogs())),
|
||||
zap.Int("delta_logs", len(segment.GetDeltalogs())),
|
||||
zap.Int("stats_logs", len(segment.GetStatslogs())))
|
||||
if gc.removeLogs(logs) {
|
||||
err := gc.meta.DropSegment(segment.GetID())
|
||||
if err != nil {
|
||||
log.Info("GC segment meta failed to drop segment", zap.Int64("segment id", segment.GetID()), zap.Error(err))
|
||||
} else {
|
||||
log.Info("GC segment meta drop semgent", zap.Int64("segment id", segment.GetID()))
|
||||
if err := gc.removeObjectFiles(ctx, logs); err != nil {
|
||||
log.Warn("GC segment remove logs failed", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
if err := gc.meta.DropSegment(segment.GetID()); err != nil {
|
||||
log.Warn("GC segment meta failed to drop segment", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
log.Info("GC segment meta drop segment done")
|
||||
|
||||
if segList := gc.meta.GetSegmentsByChannel(segInsertChannel); len(segList) == 0 &&
|
||||
!gc.meta.catalog.ChannelExists(context.Background(), segInsertChannel) {
|
||||
log.Info("empty channel found during gc, manually cleanup channel checkpoints", zap.String("vChannel", segInsertChannel))
|
||||
// TODO: remove channel checkpoint may be lost, need to be handled before segment GC?
|
||||
if err := gc.meta.DropChannelCheckpoint(segInsertChannel); err != nil {
|
||||
log.Info("failed to drop channel check point during segment garbage collection", zap.String("vchannel", segInsertChannel), zap.Error(err))
|
||||
log.Warn("failed to drop channel check point during segment garbage collection", zap.String("vchannel", segInsertChannel), zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -413,156 +491,210 @@ func (gc *garbageCollector) isExpire(dropts Timestamp) bool {
|
||||
return time.Since(droptime) > gc.option.dropTolerance
|
||||
}
|
||||
|
||||
func getLogs(sinfo *SegmentInfo) []*datapb.Binlog {
|
||||
var logs []*datapb.Binlog
|
||||
func getLogs(sinfo *SegmentInfo) map[string]struct{} {
|
||||
logs := make(map[string]struct{})
|
||||
for _, flog := range sinfo.GetBinlogs() {
|
||||
logs = append(logs, flog.GetBinlogs()...)
|
||||
for _, l := range flog.GetBinlogs() {
|
||||
logs[l.GetLogPath()] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
for _, flog := range sinfo.GetStatslogs() {
|
||||
logs = append(logs, flog.GetBinlogs()...)
|
||||
for _, l := range flog.GetBinlogs() {
|
||||
logs[l.GetLogPath()] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
for _, flog := range sinfo.GetDeltalogs() {
|
||||
logs = append(logs, flog.GetBinlogs()...)
|
||||
for _, l := range flog.GetBinlogs() {
|
||||
logs[l.GetLogPath()] = struct{}{}
|
||||
}
|
||||
}
|
||||
return logs
|
||||
}
|
||||
|
||||
func (gc *garbageCollector) removeLogs(logs []*datapb.Binlog) bool {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
var w sync.WaitGroup
|
||||
w.Add(len(logs))
|
||||
for _, l := range logs {
|
||||
tmpLog := l
|
||||
gc.option.removeLogPool.Submit(func() (struct{}, error) {
|
||||
defer w.Done()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return struct{}{}, nil
|
||||
default:
|
||||
err := gc.option.cli.Remove(ctx, tmpLog.GetLogPath())
|
||||
// removeObjectFiles remove file from oss storage, return error if any log failed to remove.
|
||||
func (gc *garbageCollector) removeObjectFiles(ctx context.Context, filePaths map[string]struct{}) error {
|
||||
futures := make([]*conc.Future[struct{}], 0)
|
||||
for filePath := range filePaths {
|
||||
filePath := filePath
|
||||
future := gc.option.removeObjectPool.Submit(func() (struct{}, error) {
|
||||
err := gc.option.cli.Remove(ctx, filePath)
|
||||
// ignore the error Key Not Found
|
||||
if err != nil {
|
||||
switch err.(type) {
|
||||
case minio.ErrorResponse:
|
||||
errResp := minio.ToErrorResponse(err)
|
||||
if errResp.Code != "" && errResp.Code != "NoSuchKey" {
|
||||
cancel()
|
||||
}
|
||||
default:
|
||||
cancel()
|
||||
if !errors.Is(err, merr.ErrIoKeyNotFound) {
|
||||
return struct{}{}, err
|
||||
}
|
||||
log.Info("remove log failed, key not found, may be removed at previous GC, ignore the error",
|
||||
zap.String("path", filePath),
|
||||
zap.Error(err))
|
||||
}
|
||||
return struct{}{}, nil
|
||||
}
|
||||
})
|
||||
futures = append(futures, future)
|
||||
}
|
||||
w.Wait()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
return conc.BlockOnAll(futures...)
|
||||
}
|
||||
|
||||
func (gc *garbageCollector) recycleUnusedIndexes() {
|
||||
log.Info("start recycleUnusedIndexes")
|
||||
// recycleUnusedIndexes is used to delete those indexes that is deleted by collection.
|
||||
func (gc *garbageCollector) recycleUnusedIndexes(ctx context.Context) {
|
||||
start := time.Now()
|
||||
log := log.With(zap.String("gcName", "recycleUnusedIndexes"), zap.Time("startAt", start))
|
||||
log.Info("start recycleUnusedIndexes...")
|
||||
defer func() { log.Info("recycleUnusedIndexes done", zap.Duration("timeCost", time.Since(start))) }()
|
||||
|
||||
deletedIndexes := gc.meta.indexMeta.GetDeletedIndexes()
|
||||
for _, index := range deletedIndexes {
|
||||
if ctx.Err() != nil {
|
||||
// process canceled.
|
||||
return
|
||||
}
|
||||
|
||||
log := log.With(zap.Int64("collectionID", index.CollectionID), zap.Int64("fieldID", index.FieldID), zap.Int64("indexID", index.IndexID))
|
||||
if err := gc.meta.indexMeta.RemoveIndex(index.CollectionID, index.IndexID); err != nil {
|
||||
log.Warn("remove index on collection fail", zap.Int64("collectionID", index.CollectionID),
|
||||
zap.Int64("indexID", index.IndexID), zap.Error(err))
|
||||
log.Warn("remove index on collection fail", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
log.Info("remove index on collection done")
|
||||
}
|
||||
}
|
||||
|
||||
func (gc *garbageCollector) recycleUnusedSegIndexes() {
|
||||
// recycleUnusedSegIndexes remove the index of segment if index is deleted or segment itself is deleted.
|
||||
func (gc *garbageCollector) recycleUnusedSegIndexes(ctx context.Context) {
|
||||
start := time.Now()
|
||||
log := log.With(zap.String("gcName", "recycleUnusedSegIndexes"), zap.Time("startAt", start))
|
||||
log.Info("start recycleUnusedSegIndexes...")
|
||||
defer func() { log.Info("recycleUnusedSegIndexes done", zap.Duration("timeCost", time.Since(start))) }()
|
||||
|
||||
segIndexes := gc.meta.indexMeta.GetAllSegIndexes()
|
||||
for _, segIdx := range segIndexes {
|
||||
if ctx.Err() != nil {
|
||||
// process canceled.
|
||||
return
|
||||
}
|
||||
|
||||
// 1. segment belongs to is deleted.
|
||||
// 2. index is deleted.
|
||||
if gc.meta.GetSegment(segIdx.SegmentID) == nil || !gc.meta.indexMeta.IsIndexExist(segIdx.CollectionID, segIdx.IndexID) {
|
||||
if err := gc.meta.indexMeta.RemoveSegmentIndex(segIdx.CollectionID, segIdx.PartitionID, segIdx.SegmentID, segIdx.IndexID, segIdx.BuildID); err != nil {
|
||||
log.Warn("delete index meta from etcd failed, wait to retry", zap.Int64("buildID", segIdx.BuildID),
|
||||
zap.Int64("segmentID", segIdx.SegmentID), zap.Int64("nodeID", segIdx.NodeID), zap.Error(err))
|
||||
indexFiles := gc.getAllIndexFilesOfIndex(segIdx)
|
||||
log := log.With(zap.Int64("collectionID", segIdx.CollectionID),
|
||||
zap.Int64("partitionID", segIdx.PartitionID),
|
||||
zap.Int64("segmentID", segIdx.SegmentID),
|
||||
zap.Int64("indexID", segIdx.IndexID),
|
||||
zap.Int64("buildID", segIdx.BuildID),
|
||||
zap.Int64("nodeID", segIdx.NodeID),
|
||||
zap.Int("indexFiles", len(indexFiles)))
|
||||
log.Info("GC Segment Index file start...")
|
||||
|
||||
// Remove index files first.
|
||||
if err := gc.removeObjectFiles(ctx, indexFiles); err != nil {
|
||||
log.Warn("fail to remove index files for index", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
log.Info("index meta recycle success", zap.Int64("buildID", segIdx.BuildID),
|
||||
zap.Int64("segmentID", segIdx.SegmentID))
|
||||
|
||||
// Remove meta from index meta.
|
||||
if err := gc.meta.indexMeta.RemoveSegmentIndex(segIdx.CollectionID, segIdx.PartitionID, segIdx.SegmentID, segIdx.IndexID, segIdx.BuildID); err != nil {
|
||||
log.Warn("delete index meta from etcd failed, wait to retry", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
log.Info("index meta recycle success")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// recycleUnusedIndexFiles is used to delete those index files that no longer exist in the meta.
|
||||
func (gc *garbageCollector) recycleUnusedIndexFiles() {
|
||||
log.Info("start recycleUnusedIndexFiles")
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
startTs := time.Now()
|
||||
func (gc *garbageCollector) recycleUnusedIndexFiles(ctx context.Context) {
|
||||
start := time.Now()
|
||||
log := log.With(zap.String("gcName", "recycleUnusedIndexFiles"), zap.Time("startAt", start))
|
||||
log.Info("start recycleUnusedIndexFiles...")
|
||||
|
||||
prefix := path.Join(gc.option.cli.RootPath(), common.SegmentIndexPath) + "/"
|
||||
// list dir first
|
||||
keys, _, err := gc.option.cli.ListWithPrefix(ctx, prefix, false)
|
||||
if err != nil {
|
||||
log.Warn("garbageCollector recycleUnusedIndexFiles list keys from chunk manager failed", zap.Error(err))
|
||||
return
|
||||
}
|
||||
log.Info("recycleUnusedIndexFiles, finish list object", zap.Duration("time spent", time.Since(startTs)), zap.Int("build ids", len(keys)))
|
||||
for _, key := range keys {
|
||||
log.Debug("indexFiles keys", zap.String("key", key))
|
||||
keyCount := 0
|
||||
err := gc.option.cli.WalkWithPrefix(ctx, prefix, false, func(indexPathInfo *storage.ChunkObjectInfo) bool {
|
||||
key := indexPathInfo.FilePath
|
||||
keyCount++
|
||||
logger := log.With(zap.String("prefix", prefix), zap.String("key", key))
|
||||
|
||||
buildID, err := parseBuildIDFromFilePath(key)
|
||||
if err != nil {
|
||||
log.Warn("garbageCollector recycleUnusedIndexFiles parseIndexFileKey", zap.String("key", key), zap.Error(err))
|
||||
continue
|
||||
logger.Warn("garbageCollector recycleUnusedIndexFiles parseIndexFileKey", zap.Error(err))
|
||||
return true
|
||||
}
|
||||
log.Info("garbageCollector will recycle index files", zap.Int64("buildID", buildID))
|
||||
logger = logger.With(zap.Int64("buildID", buildID))
|
||||
logger.Info("garbageCollector will recycle index files")
|
||||
canRecycle, segIdx := gc.meta.indexMeta.CleanSegmentIndex(buildID)
|
||||
if !canRecycle {
|
||||
// Even if the index is marked as deleted, the index file will not be recycled, wait for the next gc,
|
||||
// and delete all index files about the buildID at one time.
|
||||
log.Info("garbageCollector can not recycle index files", zap.Int64("buildID", buildID))
|
||||
continue
|
||||
logger.Info("garbageCollector can not recycle index files")
|
||||
return true
|
||||
}
|
||||
if segIdx == nil {
|
||||
// buildID no longer exists in meta, remove all index files
|
||||
log.Info("garbageCollector recycleUnusedIndexFiles find meta has not exist, remove index files",
|
||||
zap.Int64("buildID", buildID))
|
||||
logger.Info("garbageCollector recycleUnusedIndexFiles find meta has not exist, remove index files")
|
||||
err = gc.option.cli.RemoveWithPrefix(ctx, key)
|
||||
if err != nil {
|
||||
log.Warn("garbageCollector recycleUnusedIndexFiles remove index files failed",
|
||||
zap.Int64("buildID", buildID), zap.String("prefix", key), zap.Error(err))
|
||||
continue
|
||||
logger.Warn("garbageCollector recycleUnusedIndexFiles remove index files failed", zap.Error(err))
|
||||
return true
|
||||
}
|
||||
log.Info("garbageCollector recycleUnusedIndexFiles remove index files success",
|
||||
zap.Int64("buildID", buildID), zap.String("prefix", key))
|
||||
continue
|
||||
logger.Info("garbageCollector recycleUnusedIndexFiles remove index files success")
|
||||
return true
|
||||
}
|
||||
filesMap := gc.getAllIndexFilesOfIndex(segIdx)
|
||||
|
||||
logger.Info("recycle index files", zap.Int("meta files num", len(filesMap)))
|
||||
deletedFilesNum := atomic.NewInt32(0)
|
||||
fileNum := 0
|
||||
|
||||
futures := make([]*conc.Future[struct{}], 0)
|
||||
err = gc.option.cli.WalkWithPrefix(ctx, key, true, func(indexFile *storage.ChunkObjectInfo) bool {
|
||||
fileNum++
|
||||
file := indexFile.FilePath
|
||||
if _, ok := filesMap[file]; !ok {
|
||||
future := gc.option.removeObjectPool.Submit(func() (struct{}, error) {
|
||||
logger := logger.With(zap.String("file", file))
|
||||
logger.Info("garbageCollector recycleUnusedIndexFiles remove file...")
|
||||
|
||||
if err := gc.option.cli.Remove(ctx, file); err != nil {
|
||||
logger.Warn("garbageCollector recycleUnusedIndexFiles remove file failed", zap.Error(err))
|
||||
return struct{}{}, err
|
||||
}
|
||||
deletedFilesNum.Inc()
|
||||
logger.Info("garbageCollector recycleUnusedIndexFiles remove file success")
|
||||
return struct{}{}, nil
|
||||
})
|
||||
futures = append(futures, future)
|
||||
}
|
||||
return true
|
||||
})
|
||||
// Wait for all remove tasks done.
|
||||
if err := conc.BlockOnAll(futures...); err != nil {
|
||||
// error is logged, and can be ignored here.
|
||||
logger.Warn("some task failure in remove object pool", zap.Error(err))
|
||||
}
|
||||
|
||||
logger = logger.With(zap.Int("deleteIndexFilesNum", int(deletedFilesNum.Load())), zap.Int("walkFileNum", fileNum))
|
||||
if err != nil {
|
||||
logger.Warn("index files recycle failed when walk with prefix", zap.Error(err))
|
||||
return true
|
||||
}
|
||||
logger.Info("index files recycle done")
|
||||
return true
|
||||
})
|
||||
log = log.With(zap.Duration("timeCost", time.Since(start)), zap.Int("keyCount", keyCount), zap.Error(err))
|
||||
if err != nil {
|
||||
log.Warn("garbageCollector recycleUnusedIndexFiles failed", zap.Error(err))
|
||||
return
|
||||
}
|
||||
log.Info("recycleUnusedIndexFiles done")
|
||||
}
|
||||
|
||||
// getAllIndexFilesOfIndex returns the all index files of index.
|
||||
func (gc *garbageCollector) getAllIndexFilesOfIndex(segmentIndex *model.SegmentIndex) map[string]struct{} {
|
||||
filesMap := make(map[string]struct{})
|
||||
for _, fileID := range segIdx.IndexFileKeys {
|
||||
filepath := metautil.BuildSegmentIndexFilePath(gc.option.cli.RootPath(), segIdx.BuildID, segIdx.IndexVersion,
|
||||
segIdx.PartitionID, segIdx.SegmentID, fileID)
|
||||
for _, fileID := range segmentIndex.IndexFileKeys {
|
||||
filepath := metautil.BuildSegmentIndexFilePath(gc.option.cli.RootPath(), segmentIndex.BuildID, segmentIndex.IndexVersion,
|
||||
segmentIndex.PartitionID, segmentIndex.SegmentID, fileID)
|
||||
filesMap[filepath] = struct{}{}
|
||||
}
|
||||
files, _, err := gc.option.cli.ListWithPrefix(ctx, key, true)
|
||||
if err != nil {
|
||||
log.Warn("garbageCollector recycleUnusedIndexFiles list files failed",
|
||||
zap.Int64("buildID", buildID), zap.String("prefix", key), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
log.Info("recycle index files", zap.Int64("buildID", buildID), zap.Int("meta files num", len(filesMap)),
|
||||
zap.Int("chunkManager files num", len(files)))
|
||||
deletedFilesNum := 0
|
||||
for _, file := range files {
|
||||
if _, ok := filesMap[file]; !ok {
|
||||
if err = gc.option.cli.Remove(ctx, file); err != nil {
|
||||
log.Warn("garbageCollector recycleUnusedIndexFiles remove file failed",
|
||||
zap.Int64("buildID", buildID), zap.String("file", file), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
deletedFilesNum++
|
||||
}
|
||||
}
|
||||
log.Info("index files recycle success", zap.Int64("buildID", buildID),
|
||||
zap.Int("delete index files num", deletedFilesNum))
|
||||
}
|
||||
return filesMap
|
||||
}
|
||||
|
@ -51,6 +51,7 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
)
|
||||
|
||||
@ -100,7 +101,8 @@ func Test_garbageCollector_basic(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func validateMinioPrefixElements(t *testing.T, cli *minio.Client, bucketName string, prefix string, elements []string) {
|
||||
func validateMinioPrefixElements(t *testing.T, manager *storage.RemoteChunkManager, bucketName string, prefix string, elements []string) {
|
||||
cli := manager.UnderlyingObjectStorage().(*storage.MinioObjectStorage).Client
|
||||
var current []string
|
||||
for info := range cli.ListObjects(context.TODO(), bucketName, minio.ListObjectsOptions{Prefix: prefix, Recursive: true}) {
|
||||
current = append(current, info.Key)
|
||||
@ -127,12 +129,12 @@ func Test_garbageCollector_scan(t *testing.T) {
|
||||
missingTolerance: time.Hour * 24,
|
||||
dropTolerance: time.Hour * 24,
|
||||
})
|
||||
gc.scan()
|
||||
gc.recycleUnusedBinlogFiles(context.TODO())
|
||||
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
gc.close()
|
||||
})
|
||||
|
||||
@ -145,12 +147,12 @@ func Test_garbageCollector_scan(t *testing.T) {
|
||||
missingTolerance: time.Hour * 24,
|
||||
dropTolerance: time.Hour * 24,
|
||||
})
|
||||
gc.scan()
|
||||
gc.recycleUnusedBinlogFiles(context.TODO())
|
||||
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
|
||||
gc.close()
|
||||
})
|
||||
@ -172,11 +174,11 @@ func Test_garbageCollector_scan(t *testing.T) {
|
||||
dropTolerance: time.Hour * 24,
|
||||
})
|
||||
gc.start()
|
||||
gc.scan()
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
gc.recycleUnusedBinlogFiles(context.TODO())
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
|
||||
gc.close()
|
||||
})
|
||||
@ -200,11 +202,11 @@ func Test_garbageCollector_scan(t *testing.T) {
|
||||
missingTolerance: time.Hour * 24,
|
||||
dropTolerance: 0,
|
||||
})
|
||||
gc.clearEtcd()
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
gc.recycleDroppedSegments(context.TODO())
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
|
||||
gc.close()
|
||||
})
|
||||
@ -218,14 +220,14 @@ func Test_garbageCollector_scan(t *testing.T) {
|
||||
dropTolerance: 0,
|
||||
})
|
||||
gc.start()
|
||||
gc.scan()
|
||||
gc.clearEtcd()
|
||||
gc.recycleUnusedBinlogFiles(context.TODO())
|
||||
gc.recycleDroppedSegments(context.TODO())
|
||||
|
||||
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
|
||||
gc.close()
|
||||
})
|
||||
@ -240,22 +242,22 @@ func Test_garbageCollector_scan(t *testing.T) {
|
||||
dropTolerance: 0,
|
||||
})
|
||||
gc.start()
|
||||
gc.scan()
|
||||
gc.recycleUnusedBinlogFiles(context.TODO())
|
||||
|
||||
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
|
||||
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
|
||||
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
||||
|
||||
gc.close()
|
||||
})
|
||||
|
||||
cleanupOSS(cli.Client, bucketName, rootPath)
|
||||
cleanupOSS(cli, bucketName, rootPath)
|
||||
}
|
||||
|
||||
// initialize unit test sso env
|
||||
func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, inserts []string, stats []string, delta []string, other []string, err error) {
|
||||
func initUtOSSEnv(bucket, root string, n int) (mcm *storage.RemoteChunkManager, inserts []string, stats []string, delta []string, other []string, err error) {
|
||||
paramtable.Init()
|
||||
|
||||
if Params.MinioCfg.UseSSL.GetAsBool() && len(Params.MinioCfg.SslCACert.GetValue()) > 0 {
|
||||
@ -335,14 +337,16 @@ func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, i
|
||||
}
|
||||
other = append(other, info.Key)
|
||||
}
|
||||
mcm = &storage.MinioChunkManager{
|
||||
Client: cli,
|
||||
}
|
||||
mcm.SetVar(bucket, root)
|
||||
mcm = storage.NewRemoteChunkManagerForTesting(
|
||||
cli,
|
||||
bucket,
|
||||
root,
|
||||
)
|
||||
return mcm, inserts, stats, delta, other, nil
|
||||
}
|
||||
|
||||
func cleanupOSS(cli *minio.Client, bucket, root string) {
|
||||
func cleanupOSS(chunkManager *storage.RemoteChunkManager, bucket, root string) {
|
||||
cli := chunkManager.UnderlyingObjectStorage().(*storage.MinioObjectStorage).Client
|
||||
ch := cli.ListObjects(context.TODO(), bucket, minio.ListObjectsOptions{Prefix: root, Recursive: true})
|
||||
cli.RemoveObjects(context.TODO(), bucket, ch, minio.RemoveObjectsOptions{})
|
||||
cli.RemoveBucket(context.TODO(), bucket)
|
||||
@ -425,7 +429,7 @@ func TestGarbageCollector_recycleUnusedIndexes(t *testing.T) {
|
||||
mock.Anything,
|
||||
).Return(nil)
|
||||
gc := newGarbageCollector(createMetaForRecycleUnusedIndexes(catalog), nil, GcOption{})
|
||||
gc.recycleUnusedIndexes()
|
||||
gc.recycleUnusedIndexes(context.TODO())
|
||||
})
|
||||
|
||||
t.Run("fail", func(t *testing.T) {
|
||||
@ -436,7 +440,7 @@ func TestGarbageCollector_recycleUnusedIndexes(t *testing.T) {
|
||||
mock.Anything,
|
||||
).Return(errors.New("fail"))
|
||||
gc := newGarbageCollector(createMetaForRecycleUnusedIndexes(catalog), nil, GcOption{})
|
||||
gc.recycleUnusedIndexes()
|
||||
gc.recycleUnusedIndexes(context.TODO())
|
||||
})
|
||||
}
|
||||
|
||||
@ -558,6 +562,9 @@ func createMetaForRecycleUnusedSegIndexes(catalog metastore.DataCoordCatalog) *m
|
||||
|
||||
func TestGarbageCollector_recycleUnusedSegIndexes(t *testing.T) {
|
||||
t.Run("success", func(t *testing.T) {
|
||||
mockChunkManager := mocks.NewChunkManager(t)
|
||||
mockChunkManager.EXPECT().RootPath().Return("root")
|
||||
mockChunkManager.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
||||
catalog := catalogmocks.NewDataCoordCatalog(t)
|
||||
catalog.On("DropSegmentIndex",
|
||||
mock.Anything,
|
||||
@ -566,12 +573,17 @@ func TestGarbageCollector_recycleUnusedSegIndexes(t *testing.T) {
|
||||
mock.Anything,
|
||||
mock.Anything,
|
||||
).Return(nil)
|
||||
gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{})
|
||||
gc.recycleUnusedSegIndexes()
|
||||
gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{
|
||||
cli: mockChunkManager,
|
||||
})
|
||||
gc.recycleUnusedSegIndexes(context.TODO())
|
||||
})
|
||||
|
||||
t.Run("fail", func(t *testing.T) {
|
||||
catalog := catalogmocks.NewDataCoordCatalog(t)
|
||||
mockChunkManager := mocks.NewChunkManager(t)
|
||||
mockChunkManager.EXPECT().RootPath().Return("root")
|
||||
mockChunkManager.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
||||
catalog.On("DropSegmentIndex",
|
||||
mock.Anything,
|
||||
mock.Anything,
|
||||
@ -579,8 +591,10 @@ func TestGarbageCollector_recycleUnusedSegIndexes(t *testing.T) {
|
||||
mock.Anything,
|
||||
mock.Anything,
|
||||
).Return(errors.New("fail"))
|
||||
gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{})
|
||||
gc.recycleUnusedSegIndexes()
|
||||
gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{
|
||||
cli: mockChunkManager,
|
||||
})
|
||||
gc.recycleUnusedSegIndexes(context.TODO())
|
||||
})
|
||||
}
|
||||
|
||||
@ -726,7 +740,14 @@ func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
|
||||
t.Run("success", func(t *testing.T) {
|
||||
cm := &mocks.ChunkManager{}
|
||||
cm.EXPECT().RootPath().Return("root")
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
||||
for _, file := range []string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"} {
|
||||
cowf(&storage.ChunkObjectInfo{FilePath: file})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
|
||||
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
||||
gc := newGarbageCollector(
|
||||
@ -736,27 +757,36 @@ func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
|
||||
cli: cm,
|
||||
})
|
||||
|
||||
gc.recycleUnusedIndexFiles()
|
||||
gc.recycleUnusedIndexFiles(context.TODO())
|
||||
})
|
||||
|
||||
t.Run("list fail", func(t *testing.T) {
|
||||
cm := &mocks.ChunkManager{}
|
||||
cm.EXPECT().RootPath().Return("root")
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return(nil, nil, errors.New("error"))
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
||||
return errors.New("error")
|
||||
})
|
||||
gc := newGarbageCollector(
|
||||
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
|
||||
nil,
|
||||
GcOption{
|
||||
cli: cm,
|
||||
})
|
||||
gc.recycleUnusedIndexFiles()
|
||||
gc.recycleUnusedIndexFiles(context.TODO())
|
||||
})
|
||||
|
||||
t.Run("remove fail", func(t *testing.T) {
|
||||
cm := &mocks.ChunkManager{}
|
||||
cm.EXPECT().RootPath().Return("root")
|
||||
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
||||
for _, file := range []string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"} {
|
||||
cowf(&storage.ChunkObjectInfo{FilePath: file})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
|
||||
gc := newGarbageCollector(
|
||||
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
|
||||
@ -764,14 +794,20 @@ func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
|
||||
GcOption{
|
||||
cli: cm,
|
||||
})
|
||||
gc.recycleUnusedIndexFiles()
|
||||
gc.recycleUnusedIndexFiles(context.TODO())
|
||||
})
|
||||
|
||||
t.Run("remove with prefix fail", func(t *testing.T) {
|
||||
cm := &mocks.ChunkManager{}
|
||||
cm.EXPECT().RootPath().Return("root")
|
||||
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
||||
for _, file := range []string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"} {
|
||||
cowf(&storage.ChunkObjectInfo{FilePath: file})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(errors.New("error"))
|
||||
gc := newGarbageCollector(
|
||||
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
|
||||
@ -779,7 +815,7 @@ func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
|
||||
GcOption{
|
||||
cli: cm,
|
||||
})
|
||||
gc.recycleUnusedIndexFiles()
|
||||
gc.recycleUnusedIndexFiles(context.TODO())
|
||||
})
|
||||
}
|
||||
|
||||
@ -1320,7 +1356,7 @@ func TestGarbageCollector_clearETCD(t *testing.T) {
|
||||
cli: cm,
|
||||
dropTolerance: 1,
|
||||
})
|
||||
gc.clearEtcd()
|
||||
gc.recycleDroppedSegments(context.TODO())
|
||||
|
||||
/*
|
||||
A B
|
||||
@ -1376,7 +1412,7 @@ func TestGarbageCollector_clearETCD(t *testing.T) {
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
gc.clearEtcd()
|
||||
gc.recycleDroppedSegments(context.TODO())
|
||||
/*
|
||||
|
||||
A: processed prior to C, C is not GCed yet and C is not indexed, A is not GCed in this turn
|
||||
@ -1392,7 +1428,7 @@ func TestGarbageCollector_clearETCD(t *testing.T) {
|
||||
segD = gc.meta.GetSegment(segID + 3)
|
||||
assert.Nil(t, segD)
|
||||
|
||||
gc.clearEtcd()
|
||||
gc.recycleDroppedSegments(context.TODO())
|
||||
/*
|
||||
A: compacted became false due to C is GCed already, A should be GCed since dropTolernace is meet
|
||||
B: compacted became false due to C is GCed already, B should be GCed since dropTolerance is meet
|
||||
@ -1403,9 +1439,9 @@ func TestGarbageCollector_clearETCD(t *testing.T) {
|
||||
assert.Nil(t, segB)
|
||||
}
|
||||
|
||||
func TestGarbageCollector_removelogs(t *testing.T) {
|
||||
func TestGarbageCollector_removeObjectPool(t *testing.T) {
|
||||
paramtable.Init()
|
||||
cm := &mocks.ChunkManager{}
|
||||
cm := mocks.NewChunkManager(t)
|
||||
gc := newGarbageCollector(
|
||||
nil,
|
||||
nil,
|
||||
@ -1413,43 +1449,37 @@ func TestGarbageCollector_removelogs(t *testing.T) {
|
||||
cli: cm,
|
||||
dropTolerance: 1,
|
||||
})
|
||||
var logs []*datapb.Binlog
|
||||
logs := make(map[string]struct{})
|
||||
for i := 0; i < 50; i++ {
|
||||
logs = append(logs, &datapb.Binlog{
|
||||
LogPath: "log" + strconv.Itoa(i),
|
||||
})
|
||||
logs[fmt.Sprintf("log%d", i)] = struct{}{}
|
||||
}
|
||||
|
||||
t.Run("success", func(t *testing.T) {
|
||||
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
||||
defer call.Unset()
|
||||
b := gc.removeLogs(logs)
|
||||
assert.True(t, b)
|
||||
b := gc.removeObjectFiles(context.TODO(), logs)
|
||||
assert.NoError(t, b)
|
||||
})
|
||||
|
||||
t.Run("minio not found error", func(t *testing.T) {
|
||||
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(minio.ErrorResponse{
|
||||
Code: "NoSuchKey",
|
||||
})
|
||||
t.Run("oss not found error", func(t *testing.T) {
|
||||
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(merr.WrapErrIoKeyNotFound("not found"))
|
||||
defer call.Unset()
|
||||
b := gc.removeLogs(logs)
|
||||
assert.True(t, b)
|
||||
b := gc.removeObjectFiles(context.TODO(), logs)
|
||||
assert.NoError(t, b)
|
||||
})
|
||||
|
||||
t.Run("minio server error", func(t *testing.T) {
|
||||
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(minio.ErrorResponse{
|
||||
Code: "Server Error",
|
||||
})
|
||||
t.Run("oss server error", func(t *testing.T) {
|
||||
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(merr.WrapErrIoFailed("server error", errors.New("err")))
|
||||
defer call.Unset()
|
||||
b := gc.removeLogs(logs)
|
||||
assert.False(t, b)
|
||||
b := gc.removeObjectFiles(context.TODO(), logs)
|
||||
assert.Error(t, b)
|
||||
})
|
||||
|
||||
t.Run("other type error", func(t *testing.T) {
|
||||
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("other error"))
|
||||
defer call.Unset()
|
||||
b := gc.removeLogs(logs)
|
||||
assert.False(t, b)
|
||||
b := gc.removeObjectFiles(context.TODO(), logs)
|
||||
assert.Error(t, b)
|
||||
})
|
||||
}
|
||||
|
||||
@ -1459,7 +1489,7 @@ type GarbageCollectorSuite struct {
|
||||
bucketName string
|
||||
rootPath string
|
||||
|
||||
cli *storage.MinioChunkManager
|
||||
cli *storage.RemoteChunkManager
|
||||
inserts []string
|
||||
stats []string
|
||||
delta []string
|
||||
@ -1481,7 +1511,7 @@ func (s *GarbageCollectorSuite) SetupTest() {
|
||||
}
|
||||
|
||||
func (s *GarbageCollectorSuite) TearDownTest() {
|
||||
cleanupOSS(s.cli.Client, s.bucketName, s.rootPath)
|
||||
cleanupOSS(s.cli, s.bucketName, s.rootPath)
|
||||
}
|
||||
|
||||
func (s *GarbageCollectorSuite) TestPauseResume() {
|
||||
|
@ -438,7 +438,7 @@ func ListBinlogsAndGroupBySegment(ctx context.Context, cm storage.ChunkManager,
|
||||
}
|
||||
|
||||
insertPrefix := importFile.GetPaths()[0]
|
||||
segmentInsertPaths, _, err := cm.ListWithPrefix(ctx, insertPrefix, false)
|
||||
segmentInsertPaths, _, err := storage.ListAllChunkWithPrefix(ctx, cm, insertPrefix, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -450,7 +450,7 @@ func ListBinlogsAndGroupBySegment(ctx context.Context, cm storage.ChunkManager,
|
||||
return segmentImportFiles, nil
|
||||
}
|
||||
deltaPrefix := importFile.GetPaths()[1]
|
||||
segmentDeltaPaths, _, err := cm.ListWithPrefix(context.Background(), deltaPrefix, false)
|
||||
segmentDeltaPaths, _, err := storage.ListAllChunkWithPrefix(ctx, cm, deltaPrefix, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -34,6 +34,7 @@ import (
|
||||
mocks2 "github.com/milvus-io/milvus/internal/mocks"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
)
|
||||
@ -336,8 +337,24 @@ func TestImportUtil_ListBinlogsAndGroupBySegment(t *testing.T) {
|
||||
}
|
||||
|
||||
cm := mocks2.NewChunkManager(t)
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, insertPrefix, mock.Anything).Return(segmentInsertPaths, nil, nil)
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, deltaPrefix, mock.Anything).Return(segmentDeltaPaths, nil, nil)
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, insertPrefix, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
||||
for _, p := range segmentInsertPaths {
|
||||
if !cowf(&storage.ChunkObjectInfo{FilePath: p}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, deltaPrefix, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
||||
for _, p := range segmentDeltaPaths {
|
||||
if !cowf(&storage.ChunkObjectInfo{FilePath: p}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
file := &internalpb.ImportFile{
|
||||
Id: 1,
|
||||
|
@ -216,6 +216,28 @@ func (s *SegmentsInfo) SetIsCompacting(segmentID UniqueID, isCompacting bool) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SegmentInfo) IsDeltaLogExists(logID int64) bool {
|
||||
for _, deltaLogs := range s.GetDeltalogs() {
|
||||
for _, l := range deltaLogs.GetBinlogs() {
|
||||
if l.GetLogID() == logID {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *SegmentInfo) IsStatsLogExists(logID int64) bool {
|
||||
for _, statsLogs := range s.GetStatslogs() {
|
||||
for _, l := range statsLogs.GetBinlogs() {
|
||||
if l.GetLogID() == logID {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Clone deep clone the segment info and return a new instance
|
||||
func (s *SegmentInfo) Clone(opts ...SegmentInfoOption) *SegmentInfo {
|
||||
info := proto.Clone(s.SegmentInfo).(*datapb.SegmentInfo)
|
||||
|
@ -96,3 +96,49 @@ func TestCompactionTo(t *testing.T) {
|
||||
assert.True(t, ok)
|
||||
assert.Nil(t, s)
|
||||
}
|
||||
|
||||
func TestIsDeltaLogExists(t *testing.T) {
|
||||
segment := &SegmentInfo{
|
||||
SegmentInfo: &datapb.SegmentInfo{
|
||||
Deltalogs: []*datapb.FieldBinlog{
|
||||
{
|
||||
Binlogs: []*datapb.Binlog{
|
||||
{
|
||||
LogID: 1,
|
||||
},
|
||||
{
|
||||
LogID: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.True(t, segment.IsDeltaLogExists(1))
|
||||
assert.True(t, segment.IsDeltaLogExists(2))
|
||||
assert.False(t, segment.IsDeltaLogExists(3))
|
||||
assert.False(t, segment.IsDeltaLogExists(0))
|
||||
}
|
||||
|
||||
func TestIsStatsLogExists(t *testing.T) {
|
||||
segment := &SegmentInfo{
|
||||
SegmentInfo: &datapb.SegmentInfo{
|
||||
Statslogs: []*datapb.FieldBinlog{
|
||||
{
|
||||
Binlogs: []*datapb.Binlog{
|
||||
{
|
||||
LogID: 1,
|
||||
},
|
||||
{
|
||||
LogID: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.True(t, segment.IsStatsLogExists(1))
|
||||
assert.True(t, segment.IsStatsLogExists(2))
|
||||
assert.False(t, segment.IsStatsLogExists(3))
|
||||
assert.False(t, segment.IsStatsLogExists(0))
|
||||
}
|
||||
|
@ -1568,7 +1568,7 @@ func TestImportV2(t *testing.T) {
|
||||
|
||||
// list binlog failed
|
||||
cm := mocks2.NewChunkManager(t)
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return(nil, nil, mockErr)
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(mockErr)
|
||||
s.meta = &meta{chunkManager: cm}
|
||||
resp, err = s.ImportV2(ctx, &internalpb.ImportRequestInternal{
|
||||
Files: []*internalpb.ImportFile{
|
||||
|
@ -136,14 +136,8 @@ func (c *mockChunkmgr) MultiRead(ctx context.Context, filePaths []string) ([][]b
|
||||
return nil, errNotImplErr
|
||||
}
|
||||
|
||||
func (c *mockChunkmgr) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) {
|
||||
// TODO
|
||||
return nil, nil, errNotImplErr
|
||||
}
|
||||
|
||||
func (c *mockChunkmgr) ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
// TODO
|
||||
return nil, nil, errNotImplErr
|
||||
func (c *mockChunkmgr) WalkWithPrefix(ctx context.Context, prefix string, recursive bool, walkFunc storage.ChunkObjectWalkFunc) error {
|
||||
return errNotImplErr
|
||||
}
|
||||
|
||||
func (c *mockChunkmgr) Mmap(ctx context.Context, filePath string) (*mmap.ReaderAt, error) {
|
||||
|
@ -84,14 +84,7 @@ func CompressFieldBinlogs(fieldBinlogs []*datapb.FieldBinlog) error {
|
||||
for _, binlog := range fieldBinlog.Binlogs {
|
||||
logPath := binlog.GetLogPath()
|
||||
if len(logPath) != 0 {
|
||||
var logID int64
|
||||
idx := strings.LastIndex(logPath, "/")
|
||||
if idx == -1 {
|
||||
return merr.WrapErrParameterInvalidMsg(fmt.Sprintf("invalid binlog path: %s", logPath))
|
||||
}
|
||||
var err error
|
||||
logPathStr := logPath[(idx + 1):]
|
||||
logID, err = strconv.ParseInt(logPathStr, 10, 64)
|
||||
logID, err := GetLogIDFromBingLogPath(logPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -184,3 +177,19 @@ func buildLogPath(binlogType storage.BinlogType, collectionID, partitionID, segm
|
||||
// should not happen
|
||||
return "", merr.WrapErrParameterInvalidMsg("invalid binlog type")
|
||||
}
|
||||
|
||||
// GetLogIDFromBingLogPath get log id from binlog path
|
||||
func GetLogIDFromBingLogPath(logPath string) (int64, error) {
|
||||
var logID int64
|
||||
idx := strings.LastIndex(logPath, "/")
|
||||
if idx == -1 {
|
||||
return 0, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("invalid binlog path: %s", logPath))
|
||||
}
|
||||
var err error
|
||||
logPathStr := logPath[(idx + 1):]
|
||||
logID, err = strconv.ParseInt(logPathStr, 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return logID, nil
|
||||
}
|
||||
|
@ -10,8 +10,6 @@ import (
|
||||
mock "github.com/stretchr/testify/mock"
|
||||
|
||||
storage "github.com/milvus-io/milvus/internal/storage"
|
||||
|
||||
time "time"
|
||||
)
|
||||
|
||||
// ChunkManager is an autogenerated mock type for the ChunkManager type
|
||||
@ -80,71 +78,6 @@ func (_c *ChunkManager_Exist_Call) RunAndReturn(run func(context.Context, string
|
||||
return _c
|
||||
}
|
||||
|
||||
// ListWithPrefix provides a mock function with given fields: ctx, prefix, recursive
|
||||
func (_m *ChunkManager) ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
ret := _m.Called(ctx, prefix, recursive)
|
||||
|
||||
var r0 []string
|
||||
var r1 []time.Time
|
||||
var r2 error
|
||||
if rf, ok := ret.Get(0).(func(context.Context, string, bool) ([]string, []time.Time, error)); ok {
|
||||
return rf(ctx, prefix, recursive)
|
||||
}
|
||||
if rf, ok := ret.Get(0).(func(context.Context, string, bool) []string); ok {
|
||||
r0 = rf(ctx, prefix, recursive)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).([]string)
|
||||
}
|
||||
}
|
||||
|
||||
if rf, ok := ret.Get(1).(func(context.Context, string, bool) []time.Time); ok {
|
||||
r1 = rf(ctx, prefix, recursive)
|
||||
} else {
|
||||
if ret.Get(1) != nil {
|
||||
r1 = ret.Get(1).([]time.Time)
|
||||
}
|
||||
}
|
||||
|
||||
if rf, ok := ret.Get(2).(func(context.Context, string, bool) error); ok {
|
||||
r2 = rf(ctx, prefix, recursive)
|
||||
} else {
|
||||
r2 = ret.Error(2)
|
||||
}
|
||||
|
||||
return r0, r1, r2
|
||||
}
|
||||
|
||||
// ChunkManager_ListWithPrefix_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListWithPrefix'
|
||||
type ChunkManager_ListWithPrefix_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// ListWithPrefix is a helper method to define mock.On call
|
||||
// - ctx context.Context
|
||||
// - prefix string
|
||||
// - recursive bool
|
||||
func (_e *ChunkManager_Expecter) ListWithPrefix(ctx interface{}, prefix interface{}, recursive interface{}) *ChunkManager_ListWithPrefix_Call {
|
||||
return &ChunkManager_ListWithPrefix_Call{Call: _e.mock.On("ListWithPrefix", ctx, prefix, recursive)}
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_ListWithPrefix_Call) Run(run func(ctx context.Context, prefix string, recursive bool)) *ChunkManager_ListWithPrefix_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run(args[0].(context.Context), args[1].(string), args[2].(bool))
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_ListWithPrefix_Call) Return(_a0 []string, _a1 []time.Time, _a2 error) *ChunkManager_ListWithPrefix_Call {
|
||||
_c.Call.Return(_a0, _a1, _a2)
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_ListWithPrefix_Call) RunAndReturn(run func(context.Context, string, bool) ([]string, []time.Time, error)) *ChunkManager_ListWithPrefix_Call {
|
||||
_c.Call.Return(run)
|
||||
return _c
|
||||
}
|
||||
|
||||
// Mmap provides a mock function with given fields: ctx, filePath
|
||||
func (_m *ChunkManager) Mmap(ctx context.Context, filePath string) (*mmap.ReaderAt, error) {
|
||||
ret := _m.Called(ctx, filePath)
|
||||
@ -506,70 +439,6 @@ func (_c *ChunkManager_ReadAt_Call) RunAndReturn(run func(context.Context, strin
|
||||
return _c
|
||||
}
|
||||
|
||||
// ReadWithPrefix provides a mock function with given fields: ctx, prefix
|
||||
func (_m *ChunkManager) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) {
|
||||
ret := _m.Called(ctx, prefix)
|
||||
|
||||
var r0 []string
|
||||
var r1 [][]byte
|
||||
var r2 error
|
||||
if rf, ok := ret.Get(0).(func(context.Context, string) ([]string, [][]byte, error)); ok {
|
||||
return rf(ctx, prefix)
|
||||
}
|
||||
if rf, ok := ret.Get(0).(func(context.Context, string) []string); ok {
|
||||
r0 = rf(ctx, prefix)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).([]string)
|
||||
}
|
||||
}
|
||||
|
||||
if rf, ok := ret.Get(1).(func(context.Context, string) [][]byte); ok {
|
||||
r1 = rf(ctx, prefix)
|
||||
} else {
|
||||
if ret.Get(1) != nil {
|
||||
r1 = ret.Get(1).([][]byte)
|
||||
}
|
||||
}
|
||||
|
||||
if rf, ok := ret.Get(2).(func(context.Context, string) error); ok {
|
||||
r2 = rf(ctx, prefix)
|
||||
} else {
|
||||
r2 = ret.Error(2)
|
||||
}
|
||||
|
||||
return r0, r1, r2
|
||||
}
|
||||
|
||||
// ChunkManager_ReadWithPrefix_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ReadWithPrefix'
|
||||
type ChunkManager_ReadWithPrefix_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// ReadWithPrefix is a helper method to define mock.On call
|
||||
// - ctx context.Context
|
||||
// - prefix string
|
||||
func (_e *ChunkManager_Expecter) ReadWithPrefix(ctx interface{}, prefix interface{}) *ChunkManager_ReadWithPrefix_Call {
|
||||
return &ChunkManager_ReadWithPrefix_Call{Call: _e.mock.On("ReadWithPrefix", ctx, prefix)}
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_ReadWithPrefix_Call) Run(run func(ctx context.Context, prefix string)) *ChunkManager_ReadWithPrefix_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run(args[0].(context.Context), args[1].(string))
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_ReadWithPrefix_Call) Return(_a0 []string, _a1 [][]byte, _a2 error) *ChunkManager_ReadWithPrefix_Call {
|
||||
_c.Call.Return(_a0, _a1, _a2)
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_ReadWithPrefix_Call) RunAndReturn(run func(context.Context, string) ([]string, [][]byte, error)) *ChunkManager_ReadWithPrefix_Call {
|
||||
_c.Call.Return(run)
|
||||
return _c
|
||||
}
|
||||
|
||||
// Reader provides a mock function with given fields: ctx, filePath
|
||||
func (_m *ChunkManager) Reader(ctx context.Context, filePath string) (storage.FileReader, error) {
|
||||
ret := _m.Called(ctx, filePath)
|
||||
@ -805,6 +674,51 @@ func (_c *ChunkManager_Size_Call) RunAndReturn(run func(context.Context, string)
|
||||
return _c
|
||||
}
|
||||
|
||||
// WalkWithPrefix provides a mock function with given fields: ctx, prefix, recursive, walkFunc
|
||||
func (_m *ChunkManager) WalkWithPrefix(ctx context.Context, prefix string, recursive bool, walkFunc storage.ChunkObjectWalkFunc) error {
|
||||
ret := _m.Called(ctx, prefix, recursive, walkFunc)
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(context.Context, string, bool, storage.ChunkObjectWalkFunc) error); ok {
|
||||
r0 = rf(ctx, prefix, recursive, walkFunc)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// ChunkManager_WalkWithPrefix_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WalkWithPrefix'
|
||||
type ChunkManager_WalkWithPrefix_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// WalkWithPrefix is a helper method to define mock.On call
|
||||
// - ctx context.Context
|
||||
// - prefix string
|
||||
// - recursive bool
|
||||
// - walkFunc storage.ChunkObjectWalkFunc
|
||||
func (_e *ChunkManager_Expecter) WalkWithPrefix(ctx interface{}, prefix interface{}, recursive interface{}, walkFunc interface{}) *ChunkManager_WalkWithPrefix_Call {
|
||||
return &ChunkManager_WalkWithPrefix_Call{Call: _e.mock.On("WalkWithPrefix", ctx, prefix, recursive, walkFunc)}
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_WalkWithPrefix_Call) Run(run func(ctx context.Context, prefix string, recursive bool, walkFunc storage.ChunkObjectWalkFunc)) *ChunkManager_WalkWithPrefix_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run(args[0].(context.Context), args[1].(string), args[2].(bool), args[3].(storage.ChunkObjectWalkFunc))
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_WalkWithPrefix_Call) Return(_a0 error) *ChunkManager_WalkWithPrefix_Call {
|
||||
_c.Call.Return(_a0)
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *ChunkManager_WalkWithPrefix_Call) RunAndReturn(run func(context.Context, string, bool, storage.ChunkObjectWalkFunc) error) *ChunkManager_WalkWithPrefix_Call {
|
||||
_c.Call.Return(run)
|
||||
return _c
|
||||
}
|
||||
|
||||
// Write provides a mock function with given fields: ctx, filePath, content
|
||||
func (_m *ChunkManager) Write(ctx context.Context, filePath string, content []byte) error {
|
||||
ret := _m.Called(ctx, filePath, content)
|
||||
|
@ -793,7 +793,7 @@ func (sd *shardDelegator) maybeReloadPartitionStats(ctx context.Context, partIDs
|
||||
idPath := metautil.JoinIDPath(colID, partID)
|
||||
idPath = path.Join(idPath, sd.vchannelName)
|
||||
statsPathPrefix := path.Join(sd.chunkManager.RootPath(), common.PartitionStatsPath, idPath)
|
||||
filePaths, _, err := sd.chunkManager.ListWithPrefix(ctx, statsPathPrefix, true)
|
||||
filePaths, _, err := storage.ListAllChunkWithPrefix(ctx, sd.chunkManager, statsPathPrefix, true)
|
||||
if err != nil {
|
||||
log.Error("Skip initializing partition stats for failing to list files with prefix",
|
||||
zap.String("statsPathPrefix", statsPathPrefix))
|
||||
|
@ -197,21 +197,20 @@ func (AzureObjectStorage *AzureObjectStorage) StatObject(ctx context.Context, bu
|
||||
return *info.ContentLength, nil
|
||||
}
|
||||
|
||||
func (AzureObjectStorage *AzureObjectStorage) ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
var objectsKeys []string
|
||||
var modTimes []time.Time
|
||||
func (AzureObjectStorage *AzureObjectStorage) WalkWithObjects(ctx context.Context, bucketName string, prefix string, recursive bool, walkFunc ChunkObjectWalkFunc) error {
|
||||
if recursive {
|
||||
pager := AzureObjectStorage.Client.NewContainerClient(bucketName).NewListBlobsFlatPager(&azblob.ListBlobsFlatOptions{
|
||||
Prefix: &prefix,
|
||||
})
|
||||
if pager.More() {
|
||||
pageResp, err := pager.NextPage(context.Background())
|
||||
pageResp, err := pager.NextPage(ctx)
|
||||
if err != nil {
|
||||
return []string{}, []time.Time{}, checkObjectStorageError(prefix, err)
|
||||
return err
|
||||
}
|
||||
for _, blob := range pageResp.Segment.BlobItems {
|
||||
objectsKeys = append(objectsKeys, *blob.Name)
|
||||
modTimes = append(modTimes, *blob.Properties.LastModified)
|
||||
if !walkFunc(&ChunkObjectInfo{FilePath: *blob.Name, ModifyTime: *blob.Properties.LastModified}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -219,21 +218,24 @@ func (AzureObjectStorage *AzureObjectStorage) ListObjects(ctx context.Context, b
|
||||
Prefix: &prefix,
|
||||
})
|
||||
if pager.More() {
|
||||
pageResp, err := pager.NextPage(context.Background())
|
||||
pageResp, err := pager.NextPage(ctx)
|
||||
if err != nil {
|
||||
return []string{}, []time.Time{}, checkObjectStorageError(prefix, err)
|
||||
return err
|
||||
}
|
||||
|
||||
for _, blob := range pageResp.Segment.BlobItems {
|
||||
objectsKeys = append(objectsKeys, *blob.Name)
|
||||
modTimes = append(modTimes, *blob.Properties.LastModified)
|
||||
if !walkFunc(&ChunkObjectInfo{FilePath: *blob.Name, ModifyTime: *blob.Properties.LastModified}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
for _, blob := range pageResp.Segment.BlobPrefixes {
|
||||
objectsKeys = append(objectsKeys, *blob.Name)
|
||||
modTimes = append(modTimes, time.Now())
|
||||
if !walkFunc(&ChunkObjectInfo{FilePath: *blob.Name, ModifyTime: time.Now()}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return objectsKeys, modTimes, nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (AzureObjectStorage *AzureObjectStorage) RemoveObject(ctx context.Context, bucketName, objectName string) error {
|
||||
|
@ -124,7 +124,7 @@ func TestAzureObjectStorage(t *testing.T) {
|
||||
|
||||
for _, test := range loadWithPrefixTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
gotk, _, err := testCM.ListObjects(ctx, config.bucketName, test.prefix, false)
|
||||
gotk, _, err := listAllObjectsWithPrefixAtBucket(ctx, testCM, config.bucketName, test.prefix, false)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(test.expectedValue), len(gotk))
|
||||
for _, key := range gotk {
|
||||
@ -177,7 +177,7 @@ func TestAzureObjectStorage(t *testing.T) {
|
||||
|
||||
for _, test := range insertWithPrefixTests {
|
||||
t.Run(fmt.Sprintf("prefix: %s, recursive: %t", test.prefix, test.recursive), func(t *testing.T) {
|
||||
gotk, _, err := testCM.ListObjects(ctx, config.bucketName, test.prefix, test.recursive)
|
||||
gotk, _, err := listAllObjectsWithPrefixAtBucket(ctx, testCM, config.bucketName, test.prefix, test.recursive)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(test.expectedValue), len(gotk))
|
||||
for _, key := range gotk {
|
||||
|
@ -49,9 +49,7 @@ func (f *ChunkManagerFactory) newChunkManager(ctx context.Context, engine string
|
||||
switch engine {
|
||||
case "local":
|
||||
return NewLocalChunkManager(RootPath(f.config.rootPath)), nil
|
||||
case "minio", "opendal":
|
||||
return newMinioChunkManagerWithConfig(ctx, f.config)
|
||||
case "remote":
|
||||
case "remote", "minio", "opendal":
|
||||
return NewRemoteChunkManager(ctx, f.config)
|
||||
default:
|
||||
return nil, errors.New("no chunk manager implemented with engine: " + engine)
|
||||
|
@ -133,53 +133,58 @@ func (lcm *LocalChunkManager) MultiRead(ctx context.Context, filePaths []string)
|
||||
return results, el
|
||||
}
|
||||
|
||||
func (lcm *LocalChunkManager) ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
var filePaths []string
|
||||
var modTimes []time.Time
|
||||
func (lcm *LocalChunkManager) WalkWithPrefix(ctx context.Context, prefix string, recursive bool, walkFunc ChunkObjectWalkFunc) (err error) {
|
||||
logger := log.With(zap.String("prefix", prefix), zap.Bool("recursive", recursive))
|
||||
logger.Info("start walk through objects")
|
||||
defer func() {
|
||||
if err != nil {
|
||||
logger.Warn("failed to walk through objects", zap.Error(err))
|
||||
return
|
||||
}
|
||||
logger.Info("finish walk through objects")
|
||||
}()
|
||||
|
||||
if recursive {
|
||||
dir := filepath.Dir(prefix)
|
||||
err := filepath.Walk(dir, func(filePath string, f os.FileInfo, err error) error {
|
||||
return filepath.Walk(dir, func(filePath string, f os.FileInfo, err error) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if strings.HasPrefix(filePath, prefix) && !f.IsDir() {
|
||||
filePaths = append(filePaths, filePath)
|
||||
modTime, err := lcm.getModTime(filePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !walkFunc(&ChunkObjectInfo{FilePath: filePath, ModifyTime: modTime}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
for _, filePath := range filePaths {
|
||||
modTime, err2 := lcm.getModTime(filePath)
|
||||
if err2 != nil {
|
||||
return filePaths, nil, err2
|
||||
}
|
||||
modTimes = append(modTimes, modTime)
|
||||
}
|
||||
return filePaths, modTimes, nil
|
||||
}
|
||||
|
||||
globPaths, err := filepath.Glob(prefix + "*")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return err
|
||||
}
|
||||
filePaths = append(filePaths, globPaths...)
|
||||
for _, filePath := range filePaths {
|
||||
modTime, err2 := lcm.getModTime(filePath)
|
||||
if err2 != nil {
|
||||
return filePaths, nil, err2
|
||||
}
|
||||
modTimes = append(modTimes, modTime)
|
||||
for _, filePath := range globPaths {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
return filePaths, modTimes, nil
|
||||
}
|
||||
|
||||
func (lcm *LocalChunkManager) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) {
|
||||
filePaths, _, err := lcm.ListWithPrefix(ctx, prefix, true)
|
||||
modTime, err := lcm.getModTime(filePath)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return err
|
||||
}
|
||||
result, err := lcm.MultiRead(ctx, filePaths)
|
||||
return filePaths, result, err
|
||||
if !walkFunc(&ChunkObjectInfo{FilePath: filePath, ModifyTime: modTime}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadAt reads specific position data of local storage if exists.
|
||||
@ -246,13 +251,17 @@ func (lcm *LocalChunkManager) RemoveWithPrefix(ctx context.Context, prefix strin
|
||||
log.Warn(errMsg)
|
||||
return merr.WrapErrParameterInvalidMsg(errMsg)
|
||||
}
|
||||
|
||||
filePaths, _, err := lcm.ListWithPrefix(ctx, prefix, true)
|
||||
var removeErr error
|
||||
if err := lcm.WalkWithPrefix(ctx, prefix, true, func(chunkInfo *ChunkObjectInfo) bool {
|
||||
err := lcm.MultiRemove(ctx, []string{chunkInfo.FilePath})
|
||||
if err != nil {
|
||||
removeErr = err
|
||||
}
|
||||
return true
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return lcm.MultiRemove(ctx, filePaths)
|
||||
return removeErr
|
||||
}
|
||||
|
||||
func (lcm *LocalChunkManager) getModTime(filepath string) (time.Time, error) {
|
||||
|
@ -110,7 +110,7 @@ func TestLocalCM(t *testing.T) {
|
||||
|
||||
for _, test := range loadWithPrefixTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
gotk, gotv, err := testCM.ReadWithPrefix(ctx, path.Join(localPath, testLoadRoot, test.prefix))
|
||||
gotk, gotv, err := readAllChunkWithPrefix(ctx, testCM, path.Join(localPath, testLoadRoot, test.prefix))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(test.expectedValue), len(gotk))
|
||||
assert.Equal(t, len(test.expectedValue), len(gotv))
|
||||
@ -447,7 +447,7 @@ func TestLocalCM(t *testing.T) {
|
||||
// localPath/testPrefix/a/b
|
||||
// localPath/testPrefix/a/c
|
||||
pathPrefix := path.Join(localPath, testPrefix, "a")
|
||||
dirs, m, err := testCM.ListWithPrefix(ctx, pathPrefix, true)
|
||||
dirs, m, err := ListAllChunkWithPrefix(ctx, testCM, pathPrefix, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 2, len(dirs))
|
||||
assert.Equal(t, 2, len(m))
|
||||
@ -459,7 +459,7 @@ func TestLocalCM(t *testing.T) {
|
||||
assert.NoError(t, err)
|
||||
|
||||
// no file returned
|
||||
dirs, m, err = testCM.ListWithPrefix(ctx, pathPrefix, true)
|
||||
dirs, m, err = ListAllChunkWithPrefix(ctx, testCM, pathPrefix, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, len(dirs))
|
||||
assert.Equal(t, 0, len(m))
|
||||
@ -499,7 +499,7 @@ func TestLocalCM(t *testing.T) {
|
||||
// localPath/testPrefix/abd
|
||||
// localPath/testPrefix/bcd
|
||||
testPrefix1 := path.Join(localPath, testPrefix)
|
||||
dirs, mods, err := testCM.ListWithPrefix(ctx, testPrefix1+"/", false)
|
||||
dirs, mods, err := ListAllChunkWithPrefix(ctx, testCM, testPrefix1+"/", false)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 3, len(dirs))
|
||||
assert.Equal(t, 3, len(mods))
|
||||
@ -513,7 +513,7 @@ func TestLocalCM(t *testing.T) {
|
||||
// localPath/testPrefix/abc/deg
|
||||
// localPath/testPrefix/abd
|
||||
// localPath/testPrefix/bcd
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix1+"/", true)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix1+"/", true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 4, len(dirs))
|
||||
assert.Equal(t, 4, len(mods))
|
||||
@ -527,7 +527,7 @@ func TestLocalCM(t *testing.T) {
|
||||
// localPath/testPrefix/abc
|
||||
// localPath/testPrefix/abd
|
||||
testPrefix2 := path.Join(localPath, testPrefix, "a")
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix2, false)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix2, false)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 2, len(dirs))
|
||||
assert.Equal(t, 2, len(mods))
|
||||
@ -539,7 +539,7 @@ func TestLocalCM(t *testing.T) {
|
||||
// localPath/testPrefix/abc/def
|
||||
// localPath/testPrefix/abc/deg
|
||||
// localPath/testPrefix/abd
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix2, true)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix2, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 3, len(dirs))
|
||||
assert.Equal(t, 3, len(mods))
|
||||
@ -555,7 +555,7 @@ func TestLocalCM(t *testing.T) {
|
||||
// non-recursive find localPath/testPrefix
|
||||
// return:
|
||||
// localPath/testPrefix
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix1, false)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix1, false)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, len(dirs))
|
||||
assert.Equal(t, 1, len(mods))
|
||||
@ -564,7 +564,7 @@ func TestLocalCM(t *testing.T) {
|
||||
// recursive find localPath/testPrefix
|
||||
// return:
|
||||
// localPath/testPrefix/bcd
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix1, true)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix1, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, len(dirs))
|
||||
assert.Equal(t, 1, len(mods))
|
||||
@ -573,7 +573,7 @@ func TestLocalCM(t *testing.T) {
|
||||
// non-recursive find localPath/testPrefix/a*
|
||||
// return:
|
||||
// localPath/testPrefix/abc
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix2, false)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix2, false)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, len(dirs))
|
||||
assert.Equal(t, 1, len(mods))
|
||||
@ -581,7 +581,7 @@ func TestLocalCM(t *testing.T) {
|
||||
|
||||
// recursive find localPath/testPrefix/a*
|
||||
// no file returned
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix2, true)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix2, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, len(dirs))
|
||||
assert.Equal(t, 0, len(mods))
|
||||
@ -593,7 +593,7 @@ func TestLocalCM(t *testing.T) {
|
||||
|
||||
// recursive find localPath/testPrefix
|
||||
// no file returned
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix1, true)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix1, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, len(dirs))
|
||||
assert.Equal(t, 0, len(mods))
|
||||
@ -601,10 +601,27 @@ func TestLocalCM(t *testing.T) {
|
||||
// recursive find localPath/testPrefix
|
||||
// return
|
||||
// localPath/testPrefix
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, testPrefix1, false)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, testPrefix1, false)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, len(dirs))
|
||||
assert.Equal(t, 1, len(mods))
|
||||
assert.Contains(t, dirs, filepath.Dir(key4))
|
||||
})
|
||||
}
|
||||
|
||||
func readAllChunkWithPrefix(ctx context.Context, manager ChunkManager, prefix string) ([]string, [][]byte, error) {
|
||||
var paths []string
|
||||
var contents [][]byte
|
||||
if err := manager.WalkWithPrefix(ctx, prefix, true, func(object *ChunkObjectInfo) bool {
|
||||
paths = append(paths, object.FilePath)
|
||||
content, err := manager.Read(ctx, object.FilePath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
contents = append(contents, content)
|
||||
return true
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return paths, contents, nil
|
||||
}
|
||||
|
@ -1,480 +0,0 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"container/list"
|
||||
"context"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
minio "github.com/minio/minio-go/v7"
|
||||
"go.uber.org/zap"
|
||||
"golang.org/x/exp/mmap"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/metrics"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/retry"
|
||||
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
||||
)
|
||||
|
||||
var CheckBucketRetryAttempts uint = 20
|
||||
|
||||
// MinioChunkManager is responsible for read and write data stored in minio.
|
||||
type MinioChunkManager struct {
|
||||
*minio.Client
|
||||
|
||||
// ctx context.Context
|
||||
bucketName string
|
||||
rootPath string
|
||||
}
|
||||
|
||||
var _ ChunkManager = (*MinioChunkManager)(nil)
|
||||
|
||||
// NewMinioChunkManager create a new local manager object.
|
||||
// Deprecated: Do not call this directly! Use factory.NewPersistentStorageChunkManager instead.
|
||||
func NewMinioChunkManager(ctx context.Context, opts ...Option) (*MinioChunkManager, error) {
|
||||
c := newDefaultConfig()
|
||||
for _, opt := range opts {
|
||||
opt(c)
|
||||
}
|
||||
|
||||
return newMinioChunkManagerWithConfig(ctx, c)
|
||||
}
|
||||
|
||||
func newMinioChunkManagerWithConfig(ctx context.Context, c *config) (*MinioChunkManager, error) {
|
||||
minIOClient, err := newMinioClient(ctx, c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mcm := &MinioChunkManager{
|
||||
Client: minIOClient,
|
||||
bucketName: c.bucketName,
|
||||
}
|
||||
mcm.rootPath = mcm.normalizeRootPath(c.rootPath)
|
||||
log.Info("minio chunk manager init success.", zap.String("bucketname", c.bucketName), zap.String("root", mcm.RootPath()))
|
||||
return mcm, nil
|
||||
}
|
||||
|
||||
// normalizeRootPath
|
||||
func (mcm *MinioChunkManager) normalizeRootPath(rootPath string) string {
|
||||
// no leading "/"
|
||||
return strings.TrimLeft(rootPath, "/")
|
||||
}
|
||||
|
||||
// SetVar set the variable value of mcm
|
||||
func (mcm *MinioChunkManager) SetVar(bucketName string, rootPath string) {
|
||||
log.Info("minio chunkmanager ", zap.String("bucketName", bucketName), zap.String("rootpath", rootPath))
|
||||
mcm.bucketName = bucketName
|
||||
mcm.rootPath = rootPath
|
||||
}
|
||||
|
||||
// RootPath returns minio root path.
|
||||
func (mcm *MinioChunkManager) RootPath() string {
|
||||
return mcm.rootPath
|
||||
}
|
||||
|
||||
// Path returns the path of minio data if exists.
|
||||
func (mcm *MinioChunkManager) Path(ctx context.Context, filePath string) (string, error) {
|
||||
exist, err := mcm.Exist(ctx, filePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if !exist {
|
||||
return "", merr.WrapErrIoKeyNotFound(filePath)
|
||||
}
|
||||
return filePath, nil
|
||||
}
|
||||
|
||||
// Reader returns the path of minio data if exists.
|
||||
func (mcm *MinioChunkManager) Reader(ctx context.Context, filePath string) (FileReader, error) {
|
||||
reader, err := mcm.getMinioObject(ctx, mcm.bucketName, filePath, minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) Size(ctx context.Context, filePath string) (int64, error) {
|
||||
objectInfo, err := mcm.statMinioObject(ctx, mcm.bucketName, filePath, minio.StatObjectOptions{})
|
||||
if err != nil {
|
||||
log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return objectInfo.Size, nil
|
||||
}
|
||||
|
||||
// Write writes the data to minio storage.
|
||||
func (mcm *MinioChunkManager) Write(ctx context.Context, filePath string, content []byte) error {
|
||||
_, err := mcm.putMinioObject(ctx, mcm.bucketName, filePath, bytes.NewReader(content), int64(len(content)), minio.PutObjectOptions{})
|
||||
if err != nil {
|
||||
log.Warn("failed to put object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
metrics.PersistentDataKvSize.WithLabelValues(metrics.DataPutLabel).Observe(float64(len(content)))
|
||||
return nil
|
||||
}
|
||||
|
||||
// MultiWrite saves multiple objects, the path is the key of @kvs.
|
||||
// The object value is the value of @kvs.
|
||||
func (mcm *MinioChunkManager) MultiWrite(ctx context.Context, kvs map[string][]byte) error {
|
||||
errors := make([]error, 0, len(kvs))
|
||||
for key, value := range kvs {
|
||||
err := mcm.Write(ctx, key, value)
|
||||
errors = append(errors, err)
|
||||
}
|
||||
return merr.Combine(errors...)
|
||||
}
|
||||
|
||||
// Exist checks whether chunk is saved to minio storage.
|
||||
func (mcm *MinioChunkManager) Exist(ctx context.Context, filePath string) (bool, error) {
|
||||
_, err := mcm.statMinioObject(ctx, mcm.bucketName, filePath, minio.StatObjectOptions{})
|
||||
if err != nil {
|
||||
if errors.Is(err, merr.ErrIoKeyNotFound) {
|
||||
return false, nil
|
||||
}
|
||||
log.Warn("failed to stat object",
|
||||
zap.String("bucket", mcm.bucketName),
|
||||
zap.String("path", filePath),
|
||||
zap.Error(err),
|
||||
)
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Read reads the minio storage data if exists.
|
||||
func (mcm *MinioChunkManager) Read(ctx context.Context, filePath string) ([]byte, error) {
|
||||
var data []byte
|
||||
err := retry.Do(ctx, func() error {
|
||||
start := time.Now()
|
||||
object, err := mcm.getMinioObject(ctx, mcm.bucketName, filePath, minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
defer object.Close()
|
||||
|
||||
// Prefetch object data
|
||||
var empty []byte
|
||||
_, err = object.Read(empty)
|
||||
err = checkObjectStorageError(filePath, err)
|
||||
if err != nil {
|
||||
log.Warn("failed to read object", zap.String("path", filePath), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
objectInfo, err := object.Stat()
|
||||
err = checkObjectStorageError(filePath, err)
|
||||
if err != nil {
|
||||
log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
data, err = Read(object, objectInfo.Size)
|
||||
err = checkObjectStorageError(filePath, err)
|
||||
if err != nil {
|
||||
log.Warn("failed to read object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
metrics.PersistentDataKvSize.WithLabelValues(metrics.DataGetLabel).Observe(float64(objectInfo.Size))
|
||||
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataGetLabel).Observe(float64(time.Since(start).Milliseconds()))
|
||||
return nil
|
||||
}, retry.Attempts(3), retry.RetryErr(merr.IsRetryableErr))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) MultiRead(ctx context.Context, keys []string) ([][]byte, error) {
|
||||
errors := make([]error, 0)
|
||||
var objectsValues [][]byte
|
||||
for _, key := range keys {
|
||||
objectValue, err := mcm.Read(ctx, key)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
}
|
||||
objectsValues = append(objectsValues, objectValue)
|
||||
}
|
||||
|
||||
return objectsValues, merr.Combine(errors...)
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) {
|
||||
objectsKeys, _, err := mcm.ListWithPrefix(ctx, prefix, true)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
objectsValues, err := mcm.MultiRead(ctx, objectsKeys)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return objectsKeys, objectsValues, nil
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) Mmap(ctx context.Context, filePath string) (*mmap.ReaderAt, error) {
|
||||
return nil, merr.WrapErrServiceInternal("mmap not supported for MinIO chunk manager")
|
||||
}
|
||||
|
||||
// ReadAt reads specific position data of minio storage if exists.
|
||||
func (mcm *MinioChunkManager) ReadAt(ctx context.Context, filePath string, off int64, length int64) ([]byte, error) {
|
||||
if off < 0 || length < 0 {
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
opts := minio.GetObjectOptions{}
|
||||
err := opts.SetRange(off, off+length-1)
|
||||
if err != nil {
|
||||
log.Warn("failed to set range", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return nil, merr.WrapErrParameterInvalidMsg("invalid range while reading %s: %v", filePath, err)
|
||||
}
|
||||
|
||||
object, err := mcm.getMinioObject(ctx, mcm.bucketName, filePath, opts)
|
||||
if err != nil {
|
||||
log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
defer object.Close()
|
||||
|
||||
data, err := Read(object, length)
|
||||
if err != nil {
|
||||
err = checkObjectStorageError(filePath, err)
|
||||
log.Warn("failed to read object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
metrics.PersistentDataKvSize.WithLabelValues(metrics.DataGetLabel).Observe(float64(length))
|
||||
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataGetLabel).Observe(float64(time.Since(start).Milliseconds()))
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Remove deletes an object with @key.
|
||||
func (mcm *MinioChunkManager) Remove(ctx context.Context, filePath string) error {
|
||||
err := mcm.removeMinioObject(ctx, mcm.bucketName, filePath, minio.RemoveObjectOptions{})
|
||||
if err != nil {
|
||||
log.Warn("failed to remove object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MultiRemove deletes a objects with @keys.
|
||||
func (mcm *MinioChunkManager) MultiRemove(ctx context.Context, keys []string) error {
|
||||
var el error
|
||||
for _, key := range keys {
|
||||
err := mcm.Remove(ctx, key)
|
||||
if err != nil {
|
||||
el = merr.Combine(el, errors.Wrapf(err, "failed to remove %s", key))
|
||||
}
|
||||
}
|
||||
return el
|
||||
}
|
||||
|
||||
// RemoveWithPrefix removes all objects with the same prefix @prefix from minio.
|
||||
func (mcm *MinioChunkManager) RemoveWithPrefix(ctx context.Context, prefix string) error {
|
||||
objects := mcm.listMinioObjects(ctx, mcm.bucketName, minio.ListObjectsOptions{Prefix: prefix, Recursive: true})
|
||||
i := 0
|
||||
maxGoroutine := 10
|
||||
removeKeys := make([]string, 0, len(objects))
|
||||
for object := range objects {
|
||||
if object.Err != nil {
|
||||
return object.Err
|
||||
}
|
||||
removeKeys = append(removeKeys, object.Key)
|
||||
}
|
||||
for i < len(removeKeys) {
|
||||
runningGroup, groupCtx := errgroup.WithContext(ctx)
|
||||
for j := 0; j < maxGoroutine && i < len(removeKeys); j++ {
|
||||
key := removeKeys[i]
|
||||
runningGroup.Go(func() error {
|
||||
err := mcm.removeMinioObject(groupCtx, mcm.bucketName, key, minio.RemoveObjectOptions{})
|
||||
if err != nil {
|
||||
log.Warn("failed to remove object", zap.String("path", key), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
i++
|
||||
}
|
||||
if err := runningGroup.Wait(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListWithPrefix returns objects with provided prefix.
|
||||
// by default, if `recursive`=false, list object with return object with path under save level
|
||||
// say minio has followinng objects: [a, ab, a/b, ab/c]
|
||||
// calling `ListWithPrefix` with `prefix` = a && `recursive` = false will only returns [a, ab]
|
||||
// If caller needs all objects without level limitation, `recursive` shall be true.
|
||||
func (mcm *MinioChunkManager) ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
// cannot use ListObjects(ctx, bucketName, Opt{Prefix:prefix, Recursive:true})
|
||||
// if minio has lots of objects under the provided path
|
||||
// recursive = true may timeout during the recursive browsing the objects.
|
||||
// See also: https://github.com/milvus-io/milvus/issues/19095
|
||||
|
||||
var objectsKeys []string
|
||||
var modTimes []time.Time
|
||||
|
||||
tasks := list.New()
|
||||
tasks.PushBack(prefix)
|
||||
for tasks.Len() > 0 {
|
||||
e := tasks.Front()
|
||||
pre := e.Value.(string)
|
||||
tasks.Remove(e)
|
||||
|
||||
// TODO add concurrent call if performance matters
|
||||
// only return current level per call
|
||||
objects := mcm.listMinioObjects(ctx, mcm.bucketName, minio.ListObjectsOptions{Prefix: pre, Recursive: false})
|
||||
|
||||
for object := range objects {
|
||||
if object.Err != nil {
|
||||
log.Warn("failed to list with prefix", zap.String("bucket", mcm.bucketName), zap.String("prefix", prefix), zap.Error(object.Err))
|
||||
return nil, nil, object.Err
|
||||
}
|
||||
|
||||
// with tailing "/", object is a "directory"
|
||||
if strings.HasSuffix(object.Key, "/") && recursive {
|
||||
// enqueue when recursive is true
|
||||
if object.Key != pre {
|
||||
tasks.PushBack(object.Key)
|
||||
}
|
||||
continue
|
||||
}
|
||||
objectsKeys = append(objectsKeys, object.Key)
|
||||
modTimes = append(modTimes, object.LastModified)
|
||||
}
|
||||
}
|
||||
|
||||
return objectsKeys, modTimes, nil
|
||||
}
|
||||
|
||||
// Learn from file.ReadFile
|
||||
func Read(r io.Reader, size int64) ([]byte, error) {
|
||||
data := make([]byte, 0, size)
|
||||
for {
|
||||
n, err := r.Read(data[len(data):cap(data)])
|
||||
data = data[:len(data)+n]
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
}
|
||||
return data, err
|
||||
}
|
||||
if len(data) == cap(data) {
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) getMinioObject(ctx context.Context, bucketName, objectName string,
|
||||
opts minio.GetObjectOptions,
|
||||
) (*minio.Object, error) {
|
||||
start := timerecord.NewTimeRecorder("getMinioObject")
|
||||
|
||||
reader, err := mcm.Client.GetObject(ctx, bucketName, objectName, opts)
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.TotalLabel).Inc()
|
||||
if err != nil {
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.FailLabel).Inc()
|
||||
return nil, checkObjectStorageError(objectName, err)
|
||||
}
|
||||
if reader == nil {
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.FailLabel).Inc()
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataGetLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.SuccessLabel).Inc()
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) putMinioObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64,
|
||||
opts minio.PutObjectOptions,
|
||||
) (minio.UploadInfo, error) {
|
||||
start := timerecord.NewTimeRecorder("putMinioObject")
|
||||
|
||||
info, err := mcm.Client.PutObject(ctx, bucketName, objectName, reader, objectSize, opts)
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataPutLabel, metrics.TotalLabel).Inc()
|
||||
if err != nil {
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.MetaPutLabel, metrics.FailLabel).Inc()
|
||||
return info, checkObjectStorageError(objectName, err)
|
||||
}
|
||||
|
||||
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataPutLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.MetaPutLabel, metrics.SuccessLabel).Inc()
|
||||
return info, nil
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) statMinioObject(ctx context.Context, bucketName, objectName string,
|
||||
opts minio.StatObjectOptions,
|
||||
) (minio.ObjectInfo, error) {
|
||||
start := timerecord.NewTimeRecorder("statMinioObject")
|
||||
|
||||
info, err := mcm.Client.StatObject(ctx, bucketName, objectName, opts)
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.TotalLabel).Inc()
|
||||
if err != nil {
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.FailLabel).Inc()
|
||||
err = checkObjectStorageError(objectName, err)
|
||||
return info, err
|
||||
}
|
||||
|
||||
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataStatLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.SuccessLabel).Inc()
|
||||
return info, nil
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) listMinioObjects(ctx context.Context, bucketName string,
|
||||
opts minio.ListObjectsOptions,
|
||||
) <-chan minio.ObjectInfo {
|
||||
start := timerecord.NewTimeRecorder("listMinioObjects")
|
||||
|
||||
res := mcm.Client.ListObjects(ctx, bucketName, opts)
|
||||
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataListLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.TotalLabel).Inc()
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.SuccessLabel).Inc()
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (mcm *MinioChunkManager) removeMinioObject(ctx context.Context, bucketName, objectName string,
|
||||
opts minio.RemoveObjectOptions,
|
||||
) error {
|
||||
start := timerecord.NewTimeRecorder("removeMinioObject")
|
||||
|
||||
err := mcm.Client.RemoveObject(ctx, bucketName, objectName, opts)
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.TotalLabel).Inc()
|
||||
if err != nil {
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.FailLabel).Inc()
|
||||
return checkObjectStorageError(objectName, err)
|
||||
}
|
||||
|
||||
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataRemoveLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.SuccessLabel).Inc()
|
||||
return nil
|
||||
}
|
@ -1,633 +0,0 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"path"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
)
|
||||
|
||||
// TODO: NewMinioChunkManager is deprecated. Rewrite this unittest.
|
||||
func newMinIOChunkManager(ctx context.Context, bucketName string, rootPath string) (*MinioChunkManager, error) {
|
||||
endPoint := getMinioAddress()
|
||||
accessKeyID := Params.MinioCfg.AccessKeyID.GetValue()
|
||||
secretAccessKey := Params.MinioCfg.SecretAccessKey.GetValue()
|
||||
useSSL := Params.MinioCfg.UseSSL.GetAsBool()
|
||||
sslCACert := Params.MinioCfg.SslCACert.GetValue()
|
||||
client, err := NewMinioChunkManager(ctx,
|
||||
RootPath(rootPath),
|
||||
Address(endPoint),
|
||||
AccessKeyID(accessKeyID),
|
||||
SecretAccessKeyID(secretAccessKey),
|
||||
UseSSL(useSSL),
|
||||
SslCACert(sslCACert),
|
||||
BucketName(bucketName),
|
||||
UseIAM(false),
|
||||
CloudProvider("aws"),
|
||||
IAMEndpoint(""),
|
||||
CreateBucket(true),
|
||||
UseVirtualHost(false),
|
||||
Region(""),
|
||||
)
|
||||
return client, err
|
||||
}
|
||||
|
||||
func getMinioAddress() string {
|
||||
minioHost := Params.MinioCfg.Address.GetValue()
|
||||
if strings.Contains(minioHost, ":") {
|
||||
return minioHost
|
||||
}
|
||||
port := Params.MinioCfg.Port.GetValue()
|
||||
return minioHost + ":" + port
|
||||
}
|
||||
|
||||
func TestMinIOCMFail(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
accessKeyID := Params.MinioCfg.AccessKeyID.GetValue()
|
||||
secretAccessKey := Params.MinioCfg.SecretAccessKey.GetValue()
|
||||
useSSL := Params.MinioCfg.UseSSL.GetAsBool()
|
||||
sslCACert := Params.MinioCfg.SslCACert.GetValue()
|
||||
client, err := NewMinioChunkManager(ctx,
|
||||
Address("9.9.9.9:invalid"),
|
||||
AccessKeyID(accessKeyID),
|
||||
SecretAccessKeyID(secretAccessKey),
|
||||
UseSSL(useSSL),
|
||||
SslCACert(sslCACert),
|
||||
BucketName("test"),
|
||||
CreateBucket(true),
|
||||
)
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, client)
|
||||
}
|
||||
|
||||
func TestMinIOCM(t *testing.T) {
|
||||
testBucket := Params.MinioCfg.BucketName.GetValue()
|
||||
|
||||
configRoot := Params.MinioCfg.RootPath.GetValue()
|
||||
|
||||
testMinIOKVRoot := path.Join(configRoot, fmt.Sprintf("minio-ut-%d", rand.Int()))
|
||||
|
||||
t.Run("test load", func(t *testing.T) {
|
||||
testLoadRoot := path.Join(testMinIOKVRoot, "test_load")
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testLoadRoot)
|
||||
require.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testLoadRoot)
|
||||
|
||||
assert.Equal(t, testLoadRoot, testCM.RootPath())
|
||||
|
||||
prepareTests := []struct {
|
||||
key string
|
||||
value []byte
|
||||
}{
|
||||
{"abc", []byte("123")},
|
||||
{"abcd", []byte("1234")},
|
||||
{"key_1", []byte("111")},
|
||||
{"key_2", []byte("222")},
|
||||
{"key_3", []byte("333")},
|
||||
}
|
||||
|
||||
for _, test := range prepareTests {
|
||||
err = testCM.Write(ctx, path.Join(testLoadRoot, test.key), test.value)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
loadTests := []struct {
|
||||
isvalid bool
|
||||
loadKey string
|
||||
expectedValue []byte
|
||||
|
||||
description string
|
||||
}{
|
||||
{true, "abc", []byte("123"), "load valid key abc"},
|
||||
{true, "abcd", []byte("1234"), "load valid key abcd"},
|
||||
{true, "key_1", []byte("111"), "load valid key key_1"},
|
||||
{true, "key_2", []byte("222"), "load valid key key_2"},
|
||||
{true, "key_3", []byte("333"), "load valid key key_3"},
|
||||
{false, "key_not_exist", []byte(""), "load invalid key key_not_exist"},
|
||||
{false, "/", []byte(""), "load leading slash"},
|
||||
}
|
||||
|
||||
for _, test := range loadTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
if test.isvalid {
|
||||
got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, test.expectedValue, got)
|
||||
} else {
|
||||
if test.loadKey == "/" {
|
||||
got, err := testCM.Read(ctx, test.loadKey)
|
||||
assert.Error(t, err)
|
||||
assert.Empty(t, got)
|
||||
return
|
||||
}
|
||||
got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey))
|
||||
assert.Error(t, err)
|
||||
assert.Empty(t, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
loadWithPrefixTests := []struct {
|
||||
isvalid bool
|
||||
prefix string
|
||||
expectedValue [][]byte
|
||||
|
||||
description string
|
||||
}{
|
||||
{true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"},
|
||||
{true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"},
|
||||
{true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"},
|
||||
}
|
||||
|
||||
for _, test := range loadWithPrefixTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
gotk, gotv, err := testCM.ReadWithPrefix(ctx, path.Join(testLoadRoot, test.prefix))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(test.expectedValue), len(gotk))
|
||||
assert.Equal(t, len(test.expectedValue), len(gotv))
|
||||
assert.ElementsMatch(t, test.expectedValue, gotv)
|
||||
})
|
||||
}
|
||||
|
||||
multiLoadTests := []struct {
|
||||
isvalid bool
|
||||
multiKeys []string
|
||||
|
||||
expectedValue [][]byte
|
||||
description string
|
||||
}{
|
||||
{false, []string{"key_1", "key_not_exist"}, [][]byte{[]byte("111"), nil}, "multiload 1 exist 1 not"},
|
||||
{true, []string{"abc", "key_3"}, [][]byte{[]byte("123"), []byte("333")}, "multiload 2 exist"},
|
||||
}
|
||||
|
||||
for _, test := range multiLoadTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
for i := range test.multiKeys {
|
||||
test.multiKeys[i] = path.Join(testLoadRoot, test.multiKeys[i])
|
||||
}
|
||||
if test.isvalid {
|
||||
got, err := testCM.MultiRead(ctx, test.multiKeys)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, test.expectedValue, got)
|
||||
} else {
|
||||
got, err := testCM.MultiRead(ctx, test.multiKeys)
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, test.expectedValue, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("test MultiSave", func(t *testing.T) {
|
||||
testMultiSaveRoot := path.Join(testMinIOKVRoot, "test_multisave")
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testMultiSaveRoot)
|
||||
assert.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testMultiSaveRoot)
|
||||
|
||||
err = testCM.Write(ctx, path.Join(testMultiSaveRoot, "key_1"), []byte("111"))
|
||||
assert.NoError(t, err)
|
||||
|
||||
kvs := map[string][]byte{
|
||||
path.Join(testMultiSaveRoot, "key_1"): []byte("123"),
|
||||
path.Join(testMultiSaveRoot, "key_2"): []byte("456"),
|
||||
}
|
||||
|
||||
err = testCM.MultiWrite(ctx, kvs)
|
||||
assert.NoError(t, err)
|
||||
|
||||
val, err := testCM.Read(ctx, path.Join(testMultiSaveRoot, "key_1"))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []byte("123"), val)
|
||||
})
|
||||
|
||||
t.Run("test Remove", func(t *testing.T) {
|
||||
testRemoveRoot := path.Join(testMinIOKVRoot, "test_remove")
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testRemoveRoot)
|
||||
assert.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testRemoveRoot)
|
||||
|
||||
prepareTests := []struct {
|
||||
k string
|
||||
v []byte
|
||||
}{
|
||||
{"key_1", []byte("123")},
|
||||
{"key_2", []byte("456")},
|
||||
{"mkey_1", []byte("111")},
|
||||
{"mkey_2", []byte("222")},
|
||||
{"mkey_3", []byte("333")},
|
||||
{"key_prefix_1", []byte("111")},
|
||||
{"key_prefix_2", []byte("222")},
|
||||
{"key_prefix_3", []byte("333")},
|
||||
}
|
||||
|
||||
for _, test := range prepareTests {
|
||||
k := path.Join(testRemoveRoot, test.k)
|
||||
err = testCM.Write(ctx, k, test.v)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
removeTests := []struct {
|
||||
removeKey string
|
||||
valueBeforeRemove []byte
|
||||
|
||||
description string
|
||||
}{
|
||||
{"key_1", []byte("123"), "remove key_1"},
|
||||
{"key_2", []byte("456"), "remove key_2"},
|
||||
}
|
||||
|
||||
for _, test := range removeTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
k := path.Join(testRemoveRoot, test.removeKey)
|
||||
v, err := testCM.Read(ctx, k)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, test.valueBeforeRemove, v)
|
||||
|
||||
err = testCM.Remove(ctx, k)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, err = testCM.Read(ctx, k)
|
||||
require.Error(t, err)
|
||||
require.Empty(t, v)
|
||||
})
|
||||
}
|
||||
|
||||
multiRemoveTest := []string{
|
||||
path.Join(testRemoveRoot, "mkey_1"),
|
||||
path.Join(testRemoveRoot, "mkey_2"),
|
||||
path.Join(testRemoveRoot, "mkey_3"),
|
||||
}
|
||||
|
||||
lv, err := testCM.MultiRead(ctx, multiRemoveTest)
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv)
|
||||
|
||||
err = testCM.MultiRemove(ctx, multiRemoveTest)
|
||||
assert.NoError(t, err)
|
||||
|
||||
for _, k := range multiRemoveTest {
|
||||
v, err := testCM.Read(ctx, k)
|
||||
assert.Error(t, err)
|
||||
assert.Empty(t, v)
|
||||
}
|
||||
|
||||
removeWithPrefixTest := []string{
|
||||
path.Join(testRemoveRoot, "key_prefix_1"),
|
||||
path.Join(testRemoveRoot, "key_prefix_2"),
|
||||
path.Join(testRemoveRoot, "key_prefix_3"),
|
||||
}
|
||||
removePrefix := path.Join(testRemoveRoot, "key_prefix")
|
||||
|
||||
lv, err = testCM.MultiRead(ctx, removeWithPrefixTest)
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv)
|
||||
|
||||
err = testCM.RemoveWithPrefix(ctx, removePrefix)
|
||||
assert.NoError(t, err)
|
||||
|
||||
for _, k := range removeWithPrefixTest {
|
||||
v, err := testCM.Read(ctx, k)
|
||||
assert.Error(t, err)
|
||||
assert.Empty(t, v)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("test ReadAt", func(t *testing.T) {
|
||||
testLoadPartialRoot := path.Join(testMinIOKVRoot, "load_partial")
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testLoadPartialRoot)
|
||||
require.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testLoadPartialRoot)
|
||||
|
||||
key := path.Join(testLoadPartialRoot, "TestMinIOKV_LoadPartial_key")
|
||||
value := []byte("TestMinIOKV_LoadPartial_value")
|
||||
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
var off, length int64
|
||||
var partial []byte
|
||||
|
||||
off, length = 1, 1
|
||||
partial, err = testCM.ReadAt(ctx, key, off, length)
|
||||
assert.NoError(t, err)
|
||||
assert.ElementsMatch(t, partial, value[off:off+length])
|
||||
|
||||
off, length = 0, int64(len(value))
|
||||
partial, err = testCM.ReadAt(ctx, key, off, length)
|
||||
assert.NoError(t, err)
|
||||
assert.ElementsMatch(t, partial, value[off:off+length])
|
||||
|
||||
// error case
|
||||
off, length = 5, -2
|
||||
_, err = testCM.ReadAt(ctx, key, off, length)
|
||||
assert.Error(t, err)
|
||||
|
||||
off, length = -1, 2
|
||||
_, err = testCM.ReadAt(ctx, key, off, length)
|
||||
assert.Error(t, err)
|
||||
|
||||
off, length = 1, -2
|
||||
_, err = testCM.ReadAt(ctx, key, off, length)
|
||||
assert.Error(t, err)
|
||||
|
||||
err = testCM.Remove(ctx, key)
|
||||
assert.NoError(t, err)
|
||||
off, length = 1, 1
|
||||
_, err = testCM.ReadAt(ctx, key, off, length)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
t.Run("test Size", func(t *testing.T) {
|
||||
testGetSizeRoot := path.Join(testMinIOKVRoot, "get_size")
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testGetSizeRoot)
|
||||
require.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testGetSizeRoot)
|
||||
|
||||
key := path.Join(testGetSizeRoot, "TestMinIOKV_GetSize_key")
|
||||
value := []byte("TestMinIOKV_GetSize_value")
|
||||
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
size, err := testCM.Size(ctx, key)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, size, int64(len(value)))
|
||||
|
||||
key2 := path.Join(testGetSizeRoot, "TestMemoryKV_GetSize_key2")
|
||||
|
||||
size, err = testCM.Size(ctx, key2)
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, int64(0), size)
|
||||
})
|
||||
|
||||
t.Run("test Path", func(t *testing.T) {
|
||||
testGetPathRoot := path.Join(testMinIOKVRoot, "get_path")
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testGetPathRoot)
|
||||
require.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testGetPathRoot)
|
||||
|
||||
key := path.Join(testGetPathRoot, "TestMinIOKV_GetSize_key")
|
||||
value := []byte("TestMinIOKV_GetSize_value")
|
||||
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
p, err := testCM.Path(ctx, key)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, p, key)
|
||||
|
||||
key2 := path.Join(testGetPathRoot, "TestMemoryKV_GetSize_key2")
|
||||
|
||||
p, err = testCM.Path(ctx, key2)
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, p, "")
|
||||
})
|
||||
|
||||
t.Run("test Mmap", func(t *testing.T) {
|
||||
testMmapRoot := path.Join(testMinIOKVRoot, "mmap")
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testMmapRoot)
|
||||
require.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testMmapRoot)
|
||||
|
||||
key := path.Join(testMmapRoot, "TestMinIOKV_GetSize_key")
|
||||
value := []byte("TestMinIOKV_GetSize_value")
|
||||
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
r, err := testCM.Mmap(ctx, key)
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, r)
|
||||
})
|
||||
|
||||
t.Run("test Prefix", func(t *testing.T) {
|
||||
testPrefix := path.Join(testMinIOKVRoot, "prefix")
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testPrefix)
|
||||
require.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testPrefix)
|
||||
|
||||
pathB := path.Join("a", "b")
|
||||
|
||||
key := path.Join(testPrefix, pathB)
|
||||
value := []byte("a")
|
||||
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
pathC := path.Join("a", "c")
|
||||
key = path.Join(testPrefix, pathC)
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
pathPrefix := path.Join(testPrefix, "a")
|
||||
r, m, err := testCM.ListWithPrefix(ctx, pathPrefix, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(r), 2)
|
||||
assert.Equal(t, len(m), 2)
|
||||
|
||||
key = path.Join(testPrefix, "b", "b", "b")
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
key = path.Join(testPrefix, "b", "a", "b")
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
key = path.Join(testPrefix, "bc", "a", "b")
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
dirs, mods, err := testCM.ListWithPrefix(ctx, testPrefix+"/", true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 5, len(dirs))
|
||||
assert.Equal(t, 5, len(mods))
|
||||
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, path.Join(testPrefix, "b"), true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 3, len(dirs))
|
||||
assert.Equal(t, 3, len(mods))
|
||||
|
||||
testCM.RemoveWithPrefix(ctx, testPrefix)
|
||||
r, m, err = testCM.ListWithPrefix(ctx, pathPrefix, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, len(r))
|
||||
assert.Equal(t, 0, len(m))
|
||||
|
||||
// test wrong prefix
|
||||
b := make([]byte, 2048)
|
||||
pathWrong := path.Join(testPrefix, string(b))
|
||||
_, _, err = testCM.ListWithPrefix(ctx, pathWrong, true)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
t.Run("test NoSuchKey", func(t *testing.T) {
|
||||
testPrefix := path.Join(testMinIOKVRoot, "nokey")
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
testCM, err := newMinIOChunkManager(ctx, testBucket, testPrefix)
|
||||
require.NoError(t, err)
|
||||
defer testCM.RemoveWithPrefix(ctx, testPrefix)
|
||||
|
||||
key := "a"
|
||||
|
||||
_, err = testCM.Read(ctx, key)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, errors.Is(err, merr.ErrIoKeyNotFound))
|
||||
|
||||
_, err = testCM.ReadAt(ctx, key, 100, 1)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, errors.Is(err, merr.ErrIoKeyNotFound))
|
||||
})
|
||||
}
|
||||
|
||||
func TestMinioChunkManager_normalizeRootPath(t *testing.T) {
|
||||
type testCase struct {
|
||||
input string
|
||||
expected string
|
||||
}
|
||||
|
||||
cases := []testCase{
|
||||
{
|
||||
input: "files",
|
||||
expected: "files",
|
||||
},
|
||||
{
|
||||
input: "files/",
|
||||
expected: "files/",
|
||||
},
|
||||
{
|
||||
input: "/files",
|
||||
expected: "files",
|
||||
},
|
||||
{
|
||||
input: "//files",
|
||||
expected: "files",
|
||||
},
|
||||
{
|
||||
input: "files/my-folder",
|
||||
expected: "files/my-folder",
|
||||
},
|
||||
{
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
}
|
||||
|
||||
mcm := &MinioChunkManager{}
|
||||
for _, test := range cases {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
assert.Equal(t, test.expected, mcm.normalizeRootPath(test.input))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMinioChunkManager_Read(t *testing.T) {
|
||||
var reader MockReader
|
||||
reader.offset = new(int)
|
||||
reader.value = make([]byte, 10)
|
||||
reader.lastEOF = true
|
||||
for i := 0; i < 10; i++ {
|
||||
reader.value[i] = byte(i)
|
||||
}
|
||||
value, err := Read(reader, 10)
|
||||
assert.Equal(t, len(value), 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
assert.Equal(t, value[i], byte(i))
|
||||
}
|
||||
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestMinioChunkManager_ReadEOF(t *testing.T) {
|
||||
var reader MockReader
|
||||
reader.offset = new(int)
|
||||
reader.value = make([]byte, 10)
|
||||
reader.lastEOF = false
|
||||
for i := 0; i < 10; i++ {
|
||||
reader.value[i] = byte(i)
|
||||
}
|
||||
value, err := Read(reader, 10)
|
||||
assert.Equal(t, len(value), 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
assert.Equal(t, value[i], byte(i))
|
||||
}
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
type MockReader struct {
|
||||
value []byte
|
||||
offset *int
|
||||
lastEOF bool
|
||||
}
|
||||
|
||||
func (r MockReader) Read(p []byte) (n int, err error) {
|
||||
if len(r.value) == *r.offset {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
cap := len(r.value) - *r.offset
|
||||
if cap < 5 {
|
||||
copy(p, r.value[*r.offset:])
|
||||
*r.offset = len(r.value)
|
||||
if r.lastEOF {
|
||||
return cap, io.EOF
|
||||
}
|
||||
return cap, nil
|
||||
}
|
||||
|
||||
n = rand.Intn(5)
|
||||
copy(p, r.value[*r.offset:(*r.offset+n)])
|
||||
*r.offset += n
|
||||
return n, nil
|
||||
}
|
@ -17,13 +17,11 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio-go/v7"
|
||||
"github.com/minio/minio-go/v7/pkg/credentials"
|
||||
@ -33,9 +31,14 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/storage/gcp"
|
||||
"github.com/milvus-io/milvus/internal/storage/tencent"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/util/retry"
|
||||
)
|
||||
|
||||
var CheckBucketRetryAttempts uint = 20
|
||||
|
||||
var _ ObjectStorage = (*MinioObjectStorage)(nil)
|
||||
|
||||
type MinioObjectStorage struct {
|
||||
*minio.Client
|
||||
}
|
||||
@ -191,45 +194,29 @@ func (minioObjectStorage *MinioObjectStorage) StatObject(ctx context.Context, bu
|
||||
return info.Size, checkObjectStorageError(objectName, err)
|
||||
}
|
||||
|
||||
func (minioObjectStorage *MinioObjectStorage) ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
var objectsKeys []string
|
||||
var modTimes []time.Time
|
||||
tasks := list.New()
|
||||
tasks.PushBack(prefix)
|
||||
for tasks.Len() > 0 {
|
||||
e := tasks.Front()
|
||||
pre := e.Value.(string)
|
||||
tasks.Remove(e)
|
||||
|
||||
res := minioObjectStorage.Client.ListObjects(ctx, bucketName, minio.ListObjectsOptions{
|
||||
Prefix: pre,
|
||||
Recursive: false,
|
||||
func (minioObjectStorage *MinioObjectStorage) WalkWithObjects(ctx context.Context, bucketName string, prefix string, recursive bool, walkFunc ChunkObjectWalkFunc) (err error) {
|
||||
// if minio has lots of objects under the provided path
|
||||
// recursive = true may timeout during the recursive browsing the objects.
|
||||
// See also: https://github.com/milvus-io/milvus/issues/19095
|
||||
// So we can change the `ListObjectsMaxKeys` to limit the max keys by batch to avoid timeout.
|
||||
in := minioObjectStorage.Client.ListObjects(ctx, bucketName, minio.ListObjectsOptions{
|
||||
Prefix: prefix,
|
||||
Recursive: recursive,
|
||||
MaxKeys: paramtable.Get().MinioCfg.ListObjectsMaxKeys.GetAsInt(),
|
||||
})
|
||||
|
||||
objects := map[string]time.Time{}
|
||||
for object := range res {
|
||||
for object := range in {
|
||||
if object.Err != nil {
|
||||
log.Warn("failed to list with prefix", zap.String("bucket", bucketName), zap.String("prefix", prefix), zap.Error(object.Err))
|
||||
return []string{}, []time.Time{}, object.Err
|
||||
return object.Err
|
||||
}
|
||||
objects[object.Key] = object.LastModified
|
||||
}
|
||||
for object, lastModified := range objects {
|
||||
// with tailing "/", object is a "directory"
|
||||
if strings.HasSuffix(object, "/") && recursive {
|
||||
// enqueue when recursive is true
|
||||
if object != pre {
|
||||
tasks.PushBack(object)
|
||||
}
|
||||
continue
|
||||
}
|
||||
objectsKeys = append(objectsKeys, object)
|
||||
modTimes = append(modTimes, lastModified)
|
||||
if !walkFunc(&ChunkObjectInfo{FilePath: object.Key, ModifyTime: object.LastModified}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return objectsKeys, modTimes, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (minioObjectStorage *MinioObjectStorage) RemoveObject(ctx context.Context, bucketName, objectName string) error {
|
||||
return minioObjectStorage.Client.RemoveObject(ctx, bucketName, objectName, minio.RemoveObjectOptions{})
|
||||
err := minioObjectStorage.Client.RemoveObject(ctx, bucketName, objectName, minio.RemoveObjectOptions{})
|
||||
return checkObjectStorageError(objectName, err)
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio-go/v7"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@ -132,7 +133,7 @@ func TestMinioObjectStorage(t *testing.T) {
|
||||
|
||||
for _, test := range loadWithPrefixTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
gotk, _, err := testCM.ListObjects(ctx, config.bucketName, test.prefix, false)
|
||||
gotk, _, err := listAllObjectsWithPrefixAtBucket(ctx, testCM, config.bucketName, test.prefix, false)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(test.expectedValue), len(gotk))
|
||||
for _, key := range gotk {
|
||||
@ -166,7 +167,7 @@ func TestMinioObjectStorage(t *testing.T) {
|
||||
for _, test := range prepareTests {
|
||||
t.Run(test.key, func(t *testing.T) {
|
||||
err := testCM.PutObject(ctx, config.bucketName, test.key, bytes.NewReader(test.value), int64(len(test.value)))
|
||||
require.Equal(t, test.valid, err == nil)
|
||||
require.Equal(t, test.valid, err == nil, err)
|
||||
})
|
||||
}
|
||||
|
||||
@ -183,7 +184,7 @@ func TestMinioObjectStorage(t *testing.T) {
|
||||
|
||||
for _, test := range insertWithPrefixTests {
|
||||
t.Run(fmt.Sprintf("prefix: %s, recursive: %t", test.prefix, test.recursive), func(t *testing.T) {
|
||||
gotk, _, err := testCM.ListObjects(ctx, config.bucketName, test.prefix, test.recursive)
|
||||
gotk, _, err := listAllObjectsWithPrefixAtBucket(ctx, testCM, config.bucketName, test.prefix, test.recursive)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(test.expectedValue), len(gotk))
|
||||
for _, key := range gotk {
|
||||
@ -226,3 +227,17 @@ func TestMinioObjectStorage(t *testing.T) {
|
||||
config.cloudProvider = cloudProvider
|
||||
})
|
||||
}
|
||||
|
||||
// listAllObjectsWithPrefixAtBucket is a helper function to list all objects with same @prefix at bucket by using `ListWithPrefix`.
|
||||
func listAllObjectsWithPrefixAtBucket(ctx context.Context, objectStorage ObjectStorage, bucket string, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
var dirs []string
|
||||
var mods []time.Time
|
||||
if err := objectStorage.WalkWithObjects(ctx, bucket, prefix, recursive, func(chunkObjectInfo *ChunkObjectInfo) bool {
|
||||
dirs = append(dirs, chunkObjectInfo.FilePath)
|
||||
mods = append(mods, chunkObjectInfo.ModifyTime)
|
||||
return true
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return dirs, mods, nil
|
||||
}
|
||||
|
@ -21,7 +21,6 @@ import (
|
||||
"context"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
|
||||
@ -46,11 +45,20 @@ const (
|
||||
CloudProviderTencent = "tencent"
|
||||
)
|
||||
|
||||
// ChunkObjectWalkFunc is the callback function for walking objects.
|
||||
// If return false, WalkWithObjects will stop.
|
||||
// Otherwise, WalkWithObjects will continue until reach the last object.
|
||||
type ChunkObjectWalkFunc func(chunkObjectInfo *ChunkObjectInfo) bool
|
||||
|
||||
type ObjectStorage interface {
|
||||
GetObject(ctx context.Context, bucketName, objectName string, offset int64, size int64) (FileReader, error)
|
||||
PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error
|
||||
StatObject(ctx context.Context, bucketName, objectName string) (int64, error)
|
||||
ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) ([]string, []time.Time, error)
|
||||
// WalkWithPrefix walks all objects with prefix @prefix, and call walker for each object.
|
||||
// WalkWithPrefix will stop if following conditions met:
|
||||
// 1. cb return false or reach the last object, WalkWithPrefix will stop and return nil.
|
||||
// 2. underlying walking failed or context canceled, WalkWithPrefix will stop and return a error.
|
||||
WalkWithObjects(ctx context.Context, bucketName string, prefix string, recursive bool, walkFunc ChunkObjectWalkFunc) error
|
||||
RemoveObject(ctx context.Context, bucketName, objectName string) error
|
||||
}
|
||||
|
||||
@ -85,11 +93,26 @@ func NewRemoteChunkManager(ctx context.Context, c *config) (*RemoteChunkManager,
|
||||
return mcm, nil
|
||||
}
|
||||
|
||||
// NewRemoteChunkManagerForTesting is used for testing.
|
||||
func NewRemoteChunkManagerForTesting(c *minio.Client, bucket string, rootPath string) *RemoteChunkManager {
|
||||
mcm := &RemoteChunkManager{
|
||||
client: &MinioObjectStorage{c},
|
||||
bucketName: bucket,
|
||||
rootPath: rootPath,
|
||||
}
|
||||
return mcm
|
||||
}
|
||||
|
||||
// RootPath returns minio root path.
|
||||
func (mcm *RemoteChunkManager) RootPath() string {
|
||||
return mcm.rootPath
|
||||
}
|
||||
|
||||
// UnderlyingObjectStorage returns the underlying object storage.
|
||||
func (mcm *RemoteChunkManager) UnderlyingObjectStorage() ObjectStorage {
|
||||
return mcm.client
|
||||
}
|
||||
|
||||
// Path returns the path of minio data if exists.
|
||||
func (mcm *RemoteChunkManager) Path(ctx context.Context, filePath string) (string, error) {
|
||||
exist, err := mcm.Exist(ctx, filePath)
|
||||
@ -184,7 +207,7 @@ func (mcm *RemoteChunkManager) Read(ctx context.Context, filePath string) ([]byt
|
||||
log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
data, err = Read(object, size)
|
||||
data, err = read(object, size)
|
||||
err = checkObjectStorageError(filePath, err)
|
||||
if err != nil {
|
||||
log.Warn("failed to read object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
@ -214,19 +237,6 @@ func (mcm *RemoteChunkManager) MultiRead(ctx context.Context, keys []string) ([]
|
||||
return objectsValues, el
|
||||
}
|
||||
|
||||
func (mcm *RemoteChunkManager) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) {
|
||||
objectsKeys, _, err := mcm.ListWithPrefix(ctx, prefix, true)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
objectsValues, err := mcm.MultiRead(ctx, objectsKeys)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return objectsKeys, objectsValues, nil
|
||||
}
|
||||
|
||||
func (mcm *RemoteChunkManager) Mmap(ctx context.Context, filePath string) (*mmap.ReaderAt, error) {
|
||||
return nil, errors.New("this method has not been implemented")
|
||||
}
|
||||
@ -244,7 +254,7 @@ func (mcm *RemoteChunkManager) ReadAt(ctx context.Context, filePath string, off
|
||||
}
|
||||
defer object.Close()
|
||||
|
||||
data, err := Read(object, length)
|
||||
data, err := read(object, length)
|
||||
err = checkObjectStorageError(filePath, err)
|
||||
if err != nil {
|
||||
log.Warn("failed to read object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
|
||||
@ -278,49 +288,43 @@ func (mcm *RemoteChunkManager) MultiRemove(ctx context.Context, keys []string) e
|
||||
|
||||
// RemoveWithPrefix removes all objects with the same prefix @prefix from minio.
|
||||
func (mcm *RemoteChunkManager) RemoveWithPrefix(ctx context.Context, prefix string) error {
|
||||
removeKeys, _, err := mcm.listObjects(ctx, mcm.bucketName, prefix, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
i := 0
|
||||
maxGoroutine := 10
|
||||
for i < len(removeKeys) {
|
||||
runningGroup, groupCtx := errgroup.WithContext(ctx)
|
||||
for j := 0; j < maxGoroutine && i < len(removeKeys); j++ {
|
||||
key := removeKeys[i]
|
||||
// removeObject in parallel.
|
||||
runningGroup, _ := errgroup.WithContext(ctx)
|
||||
runningGroup.SetLimit(10)
|
||||
err := mcm.WalkWithPrefix(ctx, prefix, true, func(object *ChunkObjectInfo) bool {
|
||||
key := object.FilePath
|
||||
runningGroup.Go(func() error {
|
||||
err := mcm.removeObject(groupCtx, mcm.bucketName, key)
|
||||
err := mcm.removeObject(ctx, mcm.bucketName, key)
|
||||
if err != nil {
|
||||
log.Warn("failed to remove object", zap.String("path", key), zap.Error(err))
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
i++
|
||||
}
|
||||
return true
|
||||
})
|
||||
// wait all goroutines done.
|
||||
if err := runningGroup.Wait(); err != nil {
|
||||
return err
|
||||
}
|
||||
// return the iteration error
|
||||
return err
|
||||
}
|
||||
|
||||
func (mcm *RemoteChunkManager) WalkWithPrefix(ctx context.Context, prefix string, recursive bool, walkFunc ChunkObjectWalkFunc) (err error) {
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataWalkLabel, metrics.TotalLabel).Inc()
|
||||
logger := log.With(zap.String("prefix", prefix), zap.Bool("recursive", recursive))
|
||||
|
||||
logger.Info("start walk through objects")
|
||||
if err := mcm.client.WalkWithObjects(ctx, mcm.bucketName, prefix, recursive, walkFunc); err != nil {
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataWalkLabel, metrics.FailLabel).Inc()
|
||||
logger.Warn("failed to walk through objects", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataWalkLabel, metrics.SuccessLabel).Inc()
|
||||
logger.Info("finish walk through objects")
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListWithPrefix returns objects with provided prefix.
|
||||
// by default, if `recursive`=false, list object with return object with path under save level
|
||||
// say minio has followinng objects: [a, ab, a/b, ab/c]
|
||||
// calling `ListWithPrefix` with `prefix` = a && `recursive` = false will only returns [a, ab]
|
||||
// If caller needs all objects without level limitation, `recursive` shall be true.
|
||||
func (mcm *RemoteChunkManager) ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
// cannot use ListObjects(ctx, bucketName, Opt{Prefix:prefix, Recursive:true})
|
||||
// if minio has lots of objects under the provided path
|
||||
// recursive = true may timeout during the recursive browsing the objects.
|
||||
// See also: https://github.com/milvus-io/milvus/issues/19095
|
||||
|
||||
// TODO add concurrent call if performance matters
|
||||
// only return current level per call
|
||||
return mcm.listObjects(ctx, mcm.bucketName, prefix, recursive)
|
||||
}
|
||||
|
||||
func (mcm *RemoteChunkManager) getObject(ctx context.Context, bucketName, objectName string,
|
||||
offset int64, size int64,
|
||||
) (FileReader, error) {
|
||||
@ -367,22 +371,6 @@ func (mcm *RemoteChunkManager) getObjectSize(ctx context.Context, bucketName, ob
|
||||
return info, err
|
||||
}
|
||||
|
||||
func (mcm *RemoteChunkManager) listObjects(ctx context.Context, bucketName string, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
start := timerecord.NewTimeRecorder("listObjects")
|
||||
|
||||
blobNames, lastModifiedTime, err := mcm.client.ListObjects(ctx, bucketName, prefix, recursive)
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.TotalLabel).Inc()
|
||||
if err == nil {
|
||||
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataListLabel).
|
||||
Observe(float64(start.ElapseSpan().Milliseconds()))
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.SuccessLabel).Inc()
|
||||
} else {
|
||||
log.Warn("failed to list with prefix", zap.String("bucket", mcm.bucketName), zap.String("prefix", prefix), zap.Error(err))
|
||||
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.FailLabel).Inc()
|
||||
}
|
||||
return blobNames, lastModifiedTime, err
|
||||
}
|
||||
|
||||
func (mcm *RemoteChunkManager) removeObject(ctx context.Context, bucketName, objectName string) error {
|
||||
start := timerecord.NewTimeRecorder("removeObject")
|
||||
|
||||
@ -421,3 +409,21 @@ func checkObjectStorageError(fileName string, err error) error {
|
||||
}
|
||||
return merr.WrapErrIoFailed(fileName, err)
|
||||
}
|
||||
|
||||
// Learn from file.ReadFile
|
||||
func read(r io.Reader, size int64) ([]byte, error) {
|
||||
data := make([]byte, 0, size)
|
||||
for {
|
||||
n, err := r.Read(data[len(data):cap(data)])
|
||||
data = data[:len(data)+n]
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
}
|
||||
return data, err
|
||||
}
|
||||
if len(data) == cap(data) {
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -149,7 +149,7 @@ func TestMinioChunkManager(t *testing.T) {
|
||||
|
||||
for _, test := range loadWithPrefixTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
gotk, gotv, err := testCM.ReadWithPrefix(ctx, path.Join(testLoadRoot, test.prefix))
|
||||
gotk, gotv, err := readAllChunkWithPrefix(ctx, testCM, path.Join(testLoadRoot, test.prefix))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(test.expectedValue), len(gotk))
|
||||
assert.Equal(t, len(test.expectedValue), len(gotv))
|
||||
@ -455,7 +455,7 @@ func TestMinioChunkManager(t *testing.T) {
|
||||
assert.NoError(t, err)
|
||||
|
||||
pathPrefix := path.Join(testPrefix, "a")
|
||||
r, m, err := testCM.ListWithPrefix(ctx, pathPrefix, true)
|
||||
r, m, err := ListAllChunkWithPrefix(ctx, testCM, pathPrefix, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(r), 2)
|
||||
assert.Equal(t, len(m), 2)
|
||||
@ -471,18 +471,18 @@ func TestMinioChunkManager(t *testing.T) {
|
||||
key = path.Join(testPrefix, "bc", "a", "b")
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
dirs, mods, err := testCM.ListWithPrefix(ctx, testPrefix+"/", true)
|
||||
dirs, mods, err := ListAllChunkWithPrefix(ctx, testCM, testPrefix+"/", true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 5, len(dirs))
|
||||
assert.Equal(t, 5, len(mods))
|
||||
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, path.Join(testPrefix, "b"), true)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, path.Join(testPrefix, "b"), true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 3, len(dirs))
|
||||
assert.Equal(t, 3, len(mods))
|
||||
|
||||
testCM.RemoveWithPrefix(ctx, testPrefix)
|
||||
r, m, err = testCM.ListWithPrefix(ctx, pathPrefix, true)
|
||||
r, m, err = ListAllChunkWithPrefix(ctx, testCM, pathPrefix, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, len(r))
|
||||
assert.Equal(t, 0, len(m))
|
||||
@ -490,7 +490,7 @@ func TestMinioChunkManager(t *testing.T) {
|
||||
// test wrong prefix
|
||||
b := make([]byte, 2048)
|
||||
pathWrong := path.Join(testPrefix, string(b))
|
||||
_, _, err = testCM.ListWithPrefix(ctx, pathWrong, true)
|
||||
_, _, err = ListAllChunkWithPrefix(ctx, testCM, pathWrong, true)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
@ -603,7 +603,7 @@ func TestAzureChunkManager(t *testing.T) {
|
||||
|
||||
for _, test := range loadWithPrefixTests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
gotk, gotv, err := testCM.ReadWithPrefix(ctx, path.Join(testLoadRoot, test.prefix))
|
||||
gotk, gotv, err := readAllChunkWithPrefix(ctx, testCM, path.Join(testLoadRoot, test.prefix))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(test.expectedValue), len(gotk))
|
||||
assert.Equal(t, len(test.expectedValue), len(gotv))
|
||||
@ -909,7 +909,7 @@ func TestAzureChunkManager(t *testing.T) {
|
||||
assert.NoError(t, err)
|
||||
|
||||
pathPrefix := path.Join(testPrefix, "a")
|
||||
r, m, err := testCM.ListWithPrefix(ctx, pathPrefix, true)
|
||||
r, m, err := ListAllChunkWithPrefix(ctx, testCM, pathPrefix, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(r), 2)
|
||||
assert.Equal(t, len(m), 2)
|
||||
@ -925,18 +925,18 @@ func TestAzureChunkManager(t *testing.T) {
|
||||
key = path.Join(testPrefix, "bc", "a", "b")
|
||||
err = testCM.Write(ctx, key, value)
|
||||
assert.NoError(t, err)
|
||||
dirs, mods, err := testCM.ListWithPrefix(ctx, testPrefix+"/", true)
|
||||
dirs, mods, err := ListAllChunkWithPrefix(ctx, testCM, testPrefix+"/", true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 5, len(dirs))
|
||||
assert.Equal(t, 5, len(mods))
|
||||
|
||||
dirs, mods, err = testCM.ListWithPrefix(ctx, path.Join(testPrefix, "b"), true)
|
||||
dirs, mods, err = ListAllChunkWithPrefix(ctx, testCM, path.Join(testPrefix, "b"), true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 3, len(dirs))
|
||||
assert.Equal(t, 3, len(mods))
|
||||
|
||||
testCM.RemoveWithPrefix(ctx, testPrefix)
|
||||
r, m, err = testCM.ListWithPrefix(ctx, pathPrefix, true)
|
||||
r, m, err = ListAllChunkWithPrefix(ctx, testCM, pathPrefix, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, len(r))
|
||||
assert.Equal(t, 0, len(m))
|
||||
@ -944,7 +944,7 @@ func TestAzureChunkManager(t *testing.T) {
|
||||
// test wrong prefix
|
||||
b := make([]byte, 2048)
|
||||
pathWrong := path.Join(testPrefix, string(b))
|
||||
_, _, err = testCM.ListWithPrefix(ctx, pathWrong, true)
|
||||
_, _, err = ListAllChunkWithPrefix(ctx, testCM, pathWrong, true)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
|
@ -41,6 +41,12 @@ type FileReader interface {
|
||||
io.Seeker
|
||||
}
|
||||
|
||||
// ChunkObjectInfo is to store object info.
|
||||
type ChunkObjectInfo struct {
|
||||
FilePath string
|
||||
ModifyTime time.Time
|
||||
}
|
||||
|
||||
// ChunkManager is to manager chunks.
|
||||
// Include Read, Write, Remove chunks.
|
||||
type ChunkManager interface {
|
||||
@ -62,9 +68,10 @@ type ChunkManager interface {
|
||||
Reader(ctx context.Context, filePath string) (FileReader, error)
|
||||
// MultiRead reads @filePath and returns content.
|
||||
MultiRead(ctx context.Context, filePaths []string) ([][]byte, error)
|
||||
ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error)
|
||||
// ReadWithPrefix reads files with same @prefix and returns contents.
|
||||
ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error)
|
||||
// WalkWithPrefix list files with same @prefix and call @walkFunc for each file.
|
||||
// 1. walkFunc return false or reach the last object, WalkWithPrefix will stop and return nil.
|
||||
// 2. underlying walking failed or context canceled, WalkWithPrefix will stop and return a error.
|
||||
WalkWithPrefix(ctx context.Context, prefix string, recursive bool, walkFunc ChunkObjectWalkFunc) error
|
||||
Mmap(ctx context.Context, filePath string) (*mmap.ReaderAt, error)
|
||||
// ReadAt reads @filePath by offset @off, content stored in @p, return @n as the number of bytes read.
|
||||
// if all bytes are read, @err is io.EOF.
|
||||
@ -77,3 +84,18 @@ type ChunkManager interface {
|
||||
// RemoveWithPrefix remove files with same @prefix.
|
||||
RemoveWithPrefix(ctx context.Context, prefix string) error
|
||||
}
|
||||
|
||||
// ListAllChunkWithPrefix is a helper function to list all objects with same @prefix by using `ListWithPrefix`.
|
||||
// `ListWithPrefix` is more efficient way to call if you don't need all chunk at same time.
|
||||
func ListAllChunkWithPrefix(ctx context.Context, manager ChunkManager, prefix string, recursive bool) ([]string, []time.Time, error) {
|
||||
var dirs []string
|
||||
var mods []time.Time
|
||||
if err := manager.WalkWithPrefix(ctx, prefix, recursive, func(chunkInfo *ChunkObjectInfo) bool {
|
||||
dirs = append(dirs, chunkInfo.FilePath)
|
||||
mods = append(mods, chunkInfo.ModifyTime)
|
||||
return true
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return dirs, mods, nil
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ func (r *reader) init(paths []string, tsStart, tsEnd uint64) error {
|
||||
if len(paths) < 2 {
|
||||
return nil
|
||||
}
|
||||
deltaLogs, _, err := r.cm.ListWithPrefix(context.Background(), paths[1], true)
|
||||
deltaLogs, _, err := storage.ListAllChunkWithPrefix(context.Background(), r.cm, paths[1], true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/samber/lo"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@ -345,8 +346,24 @@ func (suite *ReaderSuite) run(dt schemapb.DataType) {
|
||||
originalInsertData := createInsertData(suite.T(), schema, suite.numRows)
|
||||
insertLogs := lo.Flatten(lo.Values(insertBinlogs))
|
||||
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, insertPrefix, mock.Anything).Return(insertLogs, nil, nil)
|
||||
cm.EXPECT().ListWithPrefix(mock.Anything, deltaPrefix, mock.Anything).Return(deltaLogs, nil, nil)
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, insertPrefix, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
||||
for _, filePath := range insertLogs {
|
||||
if !cowf(&storage.ChunkObjectInfo{FilePath: filePath, ModifyTime: time.Now()}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
cm.EXPECT().WalkWithPrefix(mock.Anything, deltaPrefix, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
||||
for _, filePath := range deltaLogs {
|
||||
if !cowf(&storage.ChunkObjectInfo{FilePath: filePath, ModifyTime: time.Now()}) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
for fieldID, paths := range insertBinlogs {
|
||||
field := typeutil.GetField(schema, fieldID)
|
||||
suite.NotNil(field)
|
||||
|
@ -66,20 +66,25 @@ func newBinlogReader(ctx context.Context, cm storage.ChunkManager, path string)
|
||||
}
|
||||
|
||||
func listInsertLogs(ctx context.Context, cm storage.ChunkManager, insertPrefix string) (map[int64][]string, error) {
|
||||
insertLogPaths, _, err := cm.ListWithPrefix(ctx, insertPrefix, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
insertLogs := make(map[int64][]string)
|
||||
for _, logPath := range insertLogPaths {
|
||||
fieldPath := path.Dir(logPath)
|
||||
var walkErr error
|
||||
if err := cm.WalkWithPrefix(ctx, insertPrefix, true, func(insertLog *storage.ChunkObjectInfo) bool {
|
||||
fieldPath := path.Dir(insertLog.FilePath)
|
||||
fieldStrID := path.Base(fieldPath)
|
||||
fieldID, err := strconv.ParseInt(fieldStrID, 10, 64)
|
||||
if err != nil {
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse field id from log, error: %v", err))
|
||||
walkErr = merr.WrapErrImportFailed(fmt.Sprintf("failed to parse field id from log, error: %v", err))
|
||||
return false
|
||||
}
|
||||
insertLogs[fieldID] = append(insertLogs[fieldID], logPath)
|
||||
insertLogs[fieldID] = append(insertLogs[fieldID], insertLog.FilePath)
|
||||
return true
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if walkErr != nil {
|
||||
return nil, walkErr
|
||||
}
|
||||
|
||||
for _, v := range insertLogs {
|
||||
sort.Strings(v)
|
||||
}
|
||||
|
@ -192,13 +192,13 @@ var (
|
||||
|
||||
/* garbage collector related metrics */
|
||||
|
||||
// GarbageCollectorListLatency metrics for gc scan storage files.
|
||||
GarbageCollectorListLatency = prometheus.NewHistogramVec(
|
||||
// GarbageCollectorFileScanDuration metrics for gc scan storage files.
|
||||
GarbageCollectorFileScanDuration = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: milvusNamespace,
|
||||
Subsystem: typeutil.DataCoordRole,
|
||||
Name: "gc_list_latency",
|
||||
Help: "latency of list objects in storage while garbage collecting (in milliseconds)",
|
||||
Name: "gc_file_scan_duration",
|
||||
Help: "duration of scan file in storage while garbage collecting (in milliseconds)",
|
||||
Buckets: longTaskBuckets,
|
||||
}, []string{nodeIDLabelName, segmentFileTypeLabelName})
|
||||
|
||||
@ -305,6 +305,8 @@ func RegisterDataCoord(registry *prometheus.Registry) {
|
||||
registry.MustRegister(IndexTaskNum)
|
||||
registry.MustRegister(IndexNodeNum)
|
||||
registry.MustRegister(ImportTasks)
|
||||
registry.MustRegister(GarbageCollectorFileScanDuration)
|
||||
registry.MustRegister(GarbageCollectorRunCount)
|
||||
}
|
||||
|
||||
func CleanupDataCoordSegmentMetrics(collectionID int64, segmentID int64) {
|
||||
|
@ -22,7 +22,7 @@ const (
|
||||
DataGetLabel = "get"
|
||||
DataPutLabel = "put"
|
||||
DataRemoveLabel = "remove"
|
||||
DataListLabel = "list"
|
||||
DataWalkLabel = "walk"
|
||||
DataStatLabel = "stat"
|
||||
|
||||
persistentDataOpType = "persistent_data_op_type"
|
||||
|
@ -112,3 +112,15 @@ func AwaitAll[T future](futures ...T) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// BlockOnAll blocks until all futures complete.
|
||||
// Return the first error in these futures.
|
||||
func BlockOnAll[T future](futures ...T) error {
|
||||
var err error
|
||||
for i := range futures {
|
||||
if e := futures[i].Err(); e != nil && err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/stretchr/testify/suite"
|
||||
"go.uber.org/atomic"
|
||||
)
|
||||
|
||||
type FutureSuite struct {
|
||||
@ -46,6 +47,54 @@ func (s *FutureSuite) TestFuture() {
|
||||
s.Equal(10, resultFuture.Value())
|
||||
}
|
||||
|
||||
func (s *FutureSuite) TestBlockOnAll() {
|
||||
cnt := atomic.NewInt32(0)
|
||||
futures := make([]*Future[struct{}], 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
sleepTime := time.Duration(i) * 100 * time.Millisecond
|
||||
futures[i] = Go(func() (struct{}, error) {
|
||||
time.Sleep(sleepTime)
|
||||
cnt.Add(1)
|
||||
return struct{}{}, errors.New("errFuture")
|
||||
})
|
||||
}
|
||||
|
||||
err := BlockOnAll(futures...)
|
||||
s.Error(err)
|
||||
s.Equal(int32(10), cnt.Load())
|
||||
|
||||
cnt.Store(0)
|
||||
for i := 0; i < 10; i++ {
|
||||
sleepTime := time.Duration(i) * 100 * time.Millisecond
|
||||
futures[i] = Go(func() (struct{}, error) {
|
||||
time.Sleep(sleepTime)
|
||||
cnt.Add(1)
|
||||
return struct{}{}, nil
|
||||
})
|
||||
}
|
||||
|
||||
err = BlockOnAll(futures...)
|
||||
s.NoError(err)
|
||||
s.Equal(int32(10), cnt.Load())
|
||||
}
|
||||
|
||||
func (s *FutureSuite) TestAwaitAll() {
|
||||
cnt := atomic.NewInt32(0)
|
||||
futures := make([]*Future[struct{}], 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
sleepTime := time.Duration(i) * 100 * time.Millisecond
|
||||
futures[i] = Go(func() (struct{}, error) {
|
||||
time.Sleep(sleepTime)
|
||||
cnt.Add(1)
|
||||
return struct{}{}, errors.New("errFuture")
|
||||
})
|
||||
}
|
||||
|
||||
err := AwaitAll(futures...)
|
||||
s.Error(err)
|
||||
s.Equal(int32(1), cnt.Load())
|
||||
}
|
||||
|
||||
func TestFuture(t *testing.T) {
|
||||
suite.Run(t, new(FutureSuite))
|
||||
}
|
||||
|
@ -2994,8 +2994,8 @@ During compaction, the size of segment # of rows is able to exceed segment max #
|
||||
p.GCMissingTolerance = ParamItem{
|
||||
Key: "dataCoord.gc.missingTolerance",
|
||||
Version: "2.0.0",
|
||||
DefaultValue: "3600",
|
||||
Doc: "file meta missing tolerance duration in seconds, default to 1hr",
|
||||
DefaultValue: "86400",
|
||||
Doc: "file meta missing tolerance duration in seconds, default to 24hr(1d)",
|
||||
Export: true,
|
||||
}
|
||||
p.GCMissingTolerance.Init(base.mgr)
|
||||
|
@ -1047,6 +1047,7 @@ type MinioConfig struct {
|
||||
Region ParamItem `refreshable:"false"`
|
||||
UseVirtualHost ParamItem `refreshable:"false"`
|
||||
RequestTimeoutMs ParamItem `refreshable:"false"`
|
||||
ListObjectsMaxKeys ParamItem `refreshable:"true"`
|
||||
}
|
||||
|
||||
func (p *MinioConfig) Init(base *BaseTable) {
|
||||
@ -1214,4 +1215,14 @@ Leave it empty if you want to use AWS default endpoint`,
|
||||
Export: true,
|
||||
}
|
||||
p.RequestTimeoutMs.Init(base.mgr)
|
||||
|
||||
p.ListObjectsMaxKeys = ParamItem{
|
||||
Key: "minio.listObjectsMaxKeys",
|
||||
Version: "2.4.1",
|
||||
DefaultValue: "0",
|
||||
Doc: `The maximum number of objects requested per batch in minio ListObjects rpc,
|
||||
0 means using oss client by default, decrease these configration if ListObjects timeout`,
|
||||
Export: true,
|
||||
}
|
||||
p.ListObjectsMaxKeys.Init(base.mgr)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user