Add metrics for garbage collection (#27303)

Also fix second metrics usage in compaction

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2023-09-22 18:47:25 +08:00 committed by GitHub
parent 8f4aaa2da8
commit 1d76565894
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 36 additions and 4 deletions

View File

@ -18,6 +18,7 @@ package datacoord
import ( import (
"context" "context"
"fmt"
"path" "path"
"sort" "sort"
"strings" "strings"
@ -33,7 +34,9 @@ import (
"github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/metautil" "github.com/milvus-io/milvus/pkg/util/metautil"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/pkg/util/typeutil"
) )
@ -141,9 +144,10 @@ func (gc *garbageCollector) scan() {
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath)) prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath))
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath)) prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath))
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath)) prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath))
labels := []string{metrics.InsertFileLabel, metrics.StatFileLabel, metrics.DeleteFileLabel}
var removedKeys []string var removedKeys []string
for _, prefix := range prefixes { for idx, prefix := range prefixes {
startTs := time.Now() startTs := time.Now()
infoKeys, modTimes, err := gc.option.cli.ListWithPrefix(ctx, prefix, true) infoKeys, modTimes, err := gc.option.cli.ListWithPrefix(ctx, prefix, true)
if err != nil { if err != nil {
@ -152,8 +156,12 @@ func (gc *garbageCollector) scan() {
zap.Error(err), zap.Error(err),
) )
} }
cost := time.Since(startTs)
segmentMap, filesMap := getMetaMap() segmentMap, filesMap := getMetaMap()
log.Info("gc scan finish list object", zap.String("prefix", prefix), zap.Duration("time spent", time.Since(startTs)), zap.Int("keys", len(infoKeys))) metrics.GarbageCollectorListLatency.
WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), labels[idx]).
Observe(float64(cost.Milliseconds()))
log.Info("gc scan finish list object", zap.String("prefix", prefix), zap.Duration("time spent", cost), zap.Int("keys", len(infoKeys)))
for i, infoKey := range infoKeys { for i, infoKey := range infoKeys {
total++ total++
_, has := filesMap[infoKey] _, has := filesMap[infoKey]
@ -191,6 +199,7 @@ func (gc *garbageCollector) scan() {
} }
} }
} }
metrics.GarbageCollectorRunCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Add(1)
log.Info("scan file to do garbage collection", log.Info("scan file to do garbage collection",
zap.Int("total", total), zap.Int("total", total),
zap.Int("valid", valid), zap.Int("valid", valid),

View File

@ -712,7 +712,7 @@ func (t *compactionTask) compact() (*datapb.CompactionResult, error) {
) )
log.Info("compact overall elapse", zap.Duration("elapse", time.Since(compactStart))) log.Info("compact overall elapse", zap.Duration("elapse", time.Since(compactStart)))
metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(t.tr.ElapseSpan().Seconds()) metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(t.tr.ElapseSpan().Milliseconds()))
metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds())) metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds()))
return pack, nil return pack, nil

View File

@ -149,6 +149,26 @@ var (
Buckets: buckets, Buckets: buckets,
}, []string{segmentFileTypeLabelName}) }, []string{segmentFileTypeLabelName})
/* garbage collector related metrics */
// GarbageCollectorListLatency metrics for gc scan storage files.
GarbageCollectorListLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataCoordRole,
Name: "gc_list_latency",
Help: "latency of list objects in storage while garbage collecting (in milliseconds)",
Buckets: longTaskBuckets,
}, []string{nodeIDLabelName, segmentFileTypeLabelName})
GarbageCollectorRunCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataCoordRole,
Name: "gc_run_count",
Help: "garbage collection running count",
}, []string{nodeIDLabelName})
/* hard to implement, commented now /* hard to implement, commented now
DataCoordSegmentSizeRatio = prometheus.NewHistogramVec( DataCoordSegmentSizeRatio = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{

View File

@ -154,7 +154,7 @@ var (
Subsystem: typeutil.DataNodeRole, Subsystem: typeutil.DataNodeRole,
Name: "compaction_latency", Name: "compaction_latency",
Help: "latency of compaction operation", Help: "latency of compaction operation",
Buckets: []float64{0.001, 0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000}, // unit seconds Buckets: longTaskBuckets,
}, []string{ }, []string{
nodeIDLabelName, nodeIDLabelName,
}) })

View File

@ -95,6 +95,9 @@ var (
// [1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 1.31072e+05] // [1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 1.31072e+05]
buckets = prometheus.ExponentialBuckets(1, 2, 18) buckets = prometheus.ExponentialBuckets(1, 2, 18)
// longTaskBuckets provides long task duration in milliseconds
longTaskBuckets = []float64{1, 100, 500, 1000, 5000, 10000, 20000, 50000, 100000, 250000, 500000, 1000000, 3600000, 5000000, 10000000} // unit milliseconds
NumNodes = prometheus.NewGaugeVec( NumNodes = prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Namespace: milvusNamespace, Namespace: milvusNamespace,