Add disk metric info (#25675)

Signed-off-by: xige-16 <xi.ge@zilliz.com>
This commit is contained in:
xige-16 2023-08-11 10:35:42 +08:00 committed by GitHub
parent 248bbc749d
commit 1971d98897
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 6 deletions

View File

@ -810,6 +810,10 @@ func (loader *segmentLoader) checkSegmentSize(ctx context.Context, segmentLoadIn
zap.Int64("collectionID", segmentLoadInfos[0].GetCollectionID()),
)
toMB := func(mem uint64) uint64 {
return mem / 1024 / 1024
}
memUsage := hardware.GetUsedMemoryCount() + loader.committedResource.MemorySize
totalMem := hardware.GetMemoryCount()
if memUsage == 0 || totalMem == 0 {
@ -820,6 +824,8 @@ func (loader *segmentLoader) checkSegmentSize(ctx context.Context, segmentLoadIn
if err != nil {
return 0, 0, errors.Wrap(err, "get local used size failed")
}
metrics.QueryNodeDiskUsedSize.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Set(float64(toMB(uint64(localDiskUsage))))
diskUsage := uint64(localDiskUsage) + loader.committedResource.DiskSize
maxSegmentSize := uint64(0)
@ -871,11 +877,6 @@ func (loader *segmentLoader) checkSegmentSize(ctx context.Context, segmentLoadIn
}
mmapEnabled := len(paramtable.Get().QueryNodeCfg.MmapDirPath.GetValue()) > 0
toMB := func(mem uint64) uint64 {
return mem / 1024 / 1024
}
log.Info("predict memory and disk usage while loading (in MiB)",
zap.Uint64("maxSegmentSize", toMB(maxSegmentSize)),
zap.Int("concurrency", concurrency),

View File

@ -258,7 +258,15 @@ func (node *QueryNode) Init() error {
node.factory.Init(paramtable.Get())
localChunkManager := storage.NewLocalChunkManager(storage.RootPath(paramtable.Get().LocalStorageCfg.Path.GetValue()))
localRootPath := paramtable.Get().LocalStorageCfg.Path.GetValue()
localChunkManager := storage.NewLocalChunkManager(storage.RootPath(localRootPath))
localUsedSize, err := segments.GetLocalUsedSize(localRootPath)
if err != nil {
log.Warn("get local used size failed", zap.Error(err))
initError = err
return
}
metrics.QueryNodeDiskUsedSize.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Set(float64(localUsedSize / 1024 / 1024))
remoteChunkManager, err := node.factory.NewPersistentStorageChunkManager(node.ctx)
if err != nil {
log.Warn("failed to init remote chunk manager", zap.Error(err))

View File

@ -432,6 +432,16 @@ var (
}, []string{
nodeIDLabelName,
})
QueryNodeDiskUsedSize = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.QueryNodeRole,
Name: "disk_used_size",
Help: "disk used size(MB)",
}, []string{
nodeIDLabelName,
})
)
// RegisterQueryNode registers QueryNode metrics
@ -470,6 +480,7 @@ func RegisterQueryNode(registry *prometheus.Registry) {
registry.MustRegister(QueryNodeMsgDispatcherTtLag)
registry.MustRegister(QueryNodeSegmentSearchLatencyPerVector)
registry.MustRegister(QueryNodeWatchDmlChannelLatency)
registry.MustRegister(QueryNodeDiskUsedSize)
registry.MustRegister(QueryNodeProcessCost)
registry.MustRegister(QueryNodeWaitProcessingMsgCount)
}