mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 03:48:37 +08:00
Add metrics for garbage collection (#27303)
Also fix second metrics usage in compaction Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
8f4aaa2da8
commit
1d76565894
@ -18,6 +18,7 @@ package datacoord
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
@ -33,7 +34,9 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/metrics"
|
||||
"github.com/milvus-io/milvus/pkg/util/metautil"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
@ -141,9 +144,10 @@ func (gc *garbageCollector) scan() {
|
||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath))
|
||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath))
|
||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath))
|
||||
labels := []string{metrics.InsertFileLabel, metrics.StatFileLabel, metrics.DeleteFileLabel}
|
||||
var removedKeys []string
|
||||
|
||||
for _, prefix := range prefixes {
|
||||
for idx, prefix := range prefixes {
|
||||
startTs := time.Now()
|
||||
infoKeys, modTimes, err := gc.option.cli.ListWithPrefix(ctx, prefix, true)
|
||||
if err != nil {
|
||||
@ -152,8 +156,12 @@ func (gc *garbageCollector) scan() {
|
||||
zap.Error(err),
|
||||
)
|
||||
}
|
||||
cost := time.Since(startTs)
|
||||
segmentMap, filesMap := getMetaMap()
|
||||
log.Info("gc scan finish list object", zap.String("prefix", prefix), zap.Duration("time spent", time.Since(startTs)), zap.Int("keys", len(infoKeys)))
|
||||
metrics.GarbageCollectorListLatency.
|
||||
WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), labels[idx]).
|
||||
Observe(float64(cost.Milliseconds()))
|
||||
log.Info("gc scan finish list object", zap.String("prefix", prefix), zap.Duration("time spent", cost), zap.Int("keys", len(infoKeys)))
|
||||
for i, infoKey := range infoKeys {
|
||||
total++
|
||||
_, has := filesMap[infoKey]
|
||||
@ -191,6 +199,7 @@ func (gc *garbageCollector) scan() {
|
||||
}
|
||||
}
|
||||
}
|
||||
metrics.GarbageCollectorRunCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Add(1)
|
||||
log.Info("scan file to do garbage collection",
|
||||
zap.Int("total", total),
|
||||
zap.Int("valid", valid),
|
||||
|
@ -712,7 +712,7 @@ func (t *compactionTask) compact() (*datapb.CompactionResult, error) {
|
||||
)
|
||||
|
||||
log.Info("compact overall elapse", zap.Duration("elapse", time.Since(compactStart)))
|
||||
metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(t.tr.ElapseSpan().Seconds())
|
||||
metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(t.tr.ElapseSpan().Milliseconds()))
|
||||
metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds()))
|
||||
|
||||
return pack, nil
|
||||
|
@ -149,6 +149,26 @@ var (
|
||||
Buckets: buckets,
|
||||
}, []string{segmentFileTypeLabelName})
|
||||
|
||||
/* garbage collector related metrics */
|
||||
|
||||
// GarbageCollectorListLatency metrics for gc scan storage files.
|
||||
GarbageCollectorListLatency = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: milvusNamespace,
|
||||
Subsystem: typeutil.DataCoordRole,
|
||||
Name: "gc_list_latency",
|
||||
Help: "latency of list objects in storage while garbage collecting (in milliseconds)",
|
||||
Buckets: longTaskBuckets,
|
||||
}, []string{nodeIDLabelName, segmentFileTypeLabelName})
|
||||
|
||||
GarbageCollectorRunCount = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: milvusNamespace,
|
||||
Subsystem: typeutil.DataCoordRole,
|
||||
Name: "gc_run_count",
|
||||
Help: "garbage collection running count",
|
||||
}, []string{nodeIDLabelName})
|
||||
|
||||
/* hard to implement, commented now
|
||||
DataCoordSegmentSizeRatio = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
|
@ -154,7 +154,7 @@ var (
|
||||
Subsystem: typeutil.DataNodeRole,
|
||||
Name: "compaction_latency",
|
||||
Help: "latency of compaction operation",
|
||||
Buckets: []float64{0.001, 0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000}, // unit seconds
|
||||
Buckets: longTaskBuckets,
|
||||
}, []string{
|
||||
nodeIDLabelName,
|
||||
})
|
||||
|
@ -95,6 +95,9 @@ var (
|
||||
// [1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 1.31072e+05]
|
||||
buckets = prometheus.ExponentialBuckets(1, 2, 18)
|
||||
|
||||
// longTaskBuckets provides long task duration in milliseconds
|
||||
longTaskBuckets = []float64{1, 100, 500, 1000, 5000, 10000, 20000, 50000, 100000, 250000, 500000, 1000000, 3600000, 5000000, 10000000} // unit milliseconds
|
||||
|
||||
NumNodes = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: milvusNamespace,
|
||||
|
Loading…
Reference in New Issue
Block a user