mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 03:48:37 +08:00
Add metrics for garbage collection (#27303)
Also fix second metrics usage in compaction Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
8f4aaa2da8
commit
1d76565894
@ -18,6 +18,7 @@ package datacoord
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"path"
|
"path"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
@ -33,7 +34,9 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
|
"github.com/milvus-io/milvus/pkg/metrics"
|
||||||
"github.com/milvus-io/milvus/pkg/util/metautil"
|
"github.com/milvus-io/milvus/pkg/util/metautil"
|
||||||
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -141,9 +144,10 @@ func (gc *garbageCollector) scan() {
|
|||||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath))
|
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath))
|
||||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath))
|
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath))
|
||||||
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath))
|
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath))
|
||||||
|
labels := []string{metrics.InsertFileLabel, metrics.StatFileLabel, metrics.DeleteFileLabel}
|
||||||
var removedKeys []string
|
var removedKeys []string
|
||||||
|
|
||||||
for _, prefix := range prefixes {
|
for idx, prefix := range prefixes {
|
||||||
startTs := time.Now()
|
startTs := time.Now()
|
||||||
infoKeys, modTimes, err := gc.option.cli.ListWithPrefix(ctx, prefix, true)
|
infoKeys, modTimes, err := gc.option.cli.ListWithPrefix(ctx, prefix, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -152,8 +156,12 @@ func (gc *garbageCollector) scan() {
|
|||||||
zap.Error(err),
|
zap.Error(err),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
cost := time.Since(startTs)
|
||||||
segmentMap, filesMap := getMetaMap()
|
segmentMap, filesMap := getMetaMap()
|
||||||
log.Info("gc scan finish list object", zap.String("prefix", prefix), zap.Duration("time spent", time.Since(startTs)), zap.Int("keys", len(infoKeys)))
|
metrics.GarbageCollectorListLatency.
|
||||||
|
WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), labels[idx]).
|
||||||
|
Observe(float64(cost.Milliseconds()))
|
||||||
|
log.Info("gc scan finish list object", zap.String("prefix", prefix), zap.Duration("time spent", cost), zap.Int("keys", len(infoKeys)))
|
||||||
for i, infoKey := range infoKeys {
|
for i, infoKey := range infoKeys {
|
||||||
total++
|
total++
|
||||||
_, has := filesMap[infoKey]
|
_, has := filesMap[infoKey]
|
||||||
@ -191,6 +199,7 @@ func (gc *garbageCollector) scan() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
metrics.GarbageCollectorRunCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Add(1)
|
||||||
log.Info("scan file to do garbage collection",
|
log.Info("scan file to do garbage collection",
|
||||||
zap.Int("total", total),
|
zap.Int("total", total),
|
||||||
zap.Int("valid", valid),
|
zap.Int("valid", valid),
|
||||||
|
@ -712,7 +712,7 @@ func (t *compactionTask) compact() (*datapb.CompactionResult, error) {
|
|||||||
)
|
)
|
||||||
|
|
||||||
log.Info("compact overall elapse", zap.Duration("elapse", time.Since(compactStart)))
|
log.Info("compact overall elapse", zap.Duration("elapse", time.Since(compactStart)))
|
||||||
metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(t.tr.ElapseSpan().Seconds())
|
metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(t.tr.ElapseSpan().Milliseconds()))
|
||||||
metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds()))
|
metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds()))
|
||||||
|
|
||||||
return pack, nil
|
return pack, nil
|
||||||
|
@ -149,6 +149,26 @@ var (
|
|||||||
Buckets: buckets,
|
Buckets: buckets,
|
||||||
}, []string{segmentFileTypeLabelName})
|
}, []string{segmentFileTypeLabelName})
|
||||||
|
|
||||||
|
/* garbage collector related metrics */
|
||||||
|
|
||||||
|
// GarbageCollectorListLatency metrics for gc scan storage files.
|
||||||
|
GarbageCollectorListLatency = prometheus.NewHistogramVec(
|
||||||
|
prometheus.HistogramOpts{
|
||||||
|
Namespace: milvusNamespace,
|
||||||
|
Subsystem: typeutil.DataCoordRole,
|
||||||
|
Name: "gc_list_latency",
|
||||||
|
Help: "latency of list objects in storage while garbage collecting (in milliseconds)",
|
||||||
|
Buckets: longTaskBuckets,
|
||||||
|
}, []string{nodeIDLabelName, segmentFileTypeLabelName})
|
||||||
|
|
||||||
|
GarbageCollectorRunCount = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: milvusNamespace,
|
||||||
|
Subsystem: typeutil.DataCoordRole,
|
||||||
|
Name: "gc_run_count",
|
||||||
|
Help: "garbage collection running count",
|
||||||
|
}, []string{nodeIDLabelName})
|
||||||
|
|
||||||
/* hard to implement, commented now
|
/* hard to implement, commented now
|
||||||
DataCoordSegmentSizeRatio = prometheus.NewHistogramVec(
|
DataCoordSegmentSizeRatio = prometheus.NewHistogramVec(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
|
@ -154,7 +154,7 @@ var (
|
|||||||
Subsystem: typeutil.DataNodeRole,
|
Subsystem: typeutil.DataNodeRole,
|
||||||
Name: "compaction_latency",
|
Name: "compaction_latency",
|
||||||
Help: "latency of compaction operation",
|
Help: "latency of compaction operation",
|
||||||
Buckets: []float64{0.001, 0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000}, // unit seconds
|
Buckets: longTaskBuckets,
|
||||||
}, []string{
|
}, []string{
|
||||||
nodeIDLabelName,
|
nodeIDLabelName,
|
||||||
})
|
})
|
||||||
|
@ -95,6 +95,9 @@ var (
|
|||||||
// [1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 1.31072e+05]
|
// [1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 1.31072e+05]
|
||||||
buckets = prometheus.ExponentialBuckets(1, 2, 18)
|
buckets = prometheus.ExponentialBuckets(1, 2, 18)
|
||||||
|
|
||||||
|
// longTaskBuckets provides long task duration in milliseconds
|
||||||
|
longTaskBuckets = []float64{1, 100, 500, 1000, 5000, 10000, 20000, 50000, 100000, 250000, 500000, 1000000, 3600000, 5000000, 10000000} // unit milliseconds
|
||||||
|
|
||||||
NumNodes = prometheus.NewGaugeVec(
|
NumNodes = prometheus.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Namespace: milvusNamespace,
|
Namespace: milvusNamespace,
|
||||||
|
Loading…
Reference in New Issue
Block a user