enhance: Add metrics to monitor import throughput and imported rows (#36519)

issue: https://github.com/milvus-io/milvus/issues/36518

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
yihao.dai 2024-09-28 17:31:15 +08:00 committed by GitHub
parent acc9b5ada5
commit 80f25d497f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 41 additions and 2 deletions

View File

@ -36,6 +36,7 @@ import (
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -77,6 +78,11 @@ func NewSyncTask(ctx context.Context,
return nil, err
}
segmentLevel := datapb.SegmentLevel_L1
if insertData == nil && deleteData != nil {
segmentLevel = datapb.SegmentLevel_L0
}
syncPack := &syncmgr.SyncPack{}
syncPack.WithInsertData([]*storage.InsertData{insertData}).
WithDeleteData(deleteData).
@ -85,6 +91,8 @@ func NewSyncTask(ctx context.Context,
WithChannelName(vchannel).
WithSegmentID(segmentID).
WithTimeRange(ts, ts).
WithLevel(segmentLevel).
WithDataSource(metrics.BulkinsertDataSourceLabel).
WithBatchSize(int64(insertData.GetRowNum()))
return serializer.EncodeBuffer(ctx, syncPack)

View File

@ -117,3 +117,8 @@ func (t *SyncTask) WithLevel(level datapb.SegmentLevel) *SyncTask {
t.level = level
return t
}
func (t *SyncTask) WithDataSource(source string) *SyncTask {
t.dataSource = source
return t
}

View File

@ -49,6 +49,7 @@ type SyncPack struct {
startPosition *msgpb.MsgPosition
checkpoint *msgpb.MsgPosition
batchSize int64 // batchSize is the row number of this sync task,not the total num of rows of segemnt
dataSource string
isFlush bool
isDrop bool
// metadata
@ -137,3 +138,8 @@ func (p *SyncPack) WithErrorHandler(handler func(err error)) *SyncPack {
p.errHandler = handler
return p
}
func (p *SyncPack) WithDataSource(source string) *SyncPack {
p.dataSource = source
return p
}

View File

@ -174,6 +174,7 @@ func (s *storageV1Serializer) setTaskMeta(task *SyncTask, pack *SyncPack) {
WithStartPosition(pack.startPosition).
WithCheckpoint(pack.checkpoint).
WithLevel(pack.level).
WithDataSource(pack.dataSource).
WithTimeRange(pack.tsFrom, pack.tsTo).
WithMetaCache(s.metacache).
WithMetaWriter(s.metaWriter).

View File

@ -55,6 +55,7 @@ type SyncTask struct {
pkField *schemapb.FieldSchema
startPosition *msgpb.MsgPosition
checkpoint *msgpb.MsgPosition
dataSource string
// batchSize is the row number of this sync task,
// not the total num of rows of segemnt
batchSize int64
@ -169,7 +170,8 @@ func (t *SyncTask) Run(ctx context.Context) (err error) {
totalSize += float64(len(t.deltaBlob.Value))
}
metrics.DataNodeFlushedSize.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.AllLabel, t.level.String()).Add(totalSize)
metrics.DataNodeFlushedSize.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), t.dataSource, t.level.String()).Add(totalSize)
metrics.DataNodeFlushedRows.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), t.dataSource).Add(float64(t.batchSize))
metrics.DataNodeSave2StorageLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), t.level.String()).Observe(float64(t.tr.RecordSpan().Milliseconds()))

View File

@ -603,6 +603,7 @@ func (wb *writeBufferBase) getSyncTask(ctx context.Context, segmentID int64) (sy
WithStartPosition(startPos).
WithTimeRange(tsFrom, tsTo).
WithLevel(segmentInfo.Level()).
WithDataSource(metrics.StreamingDataSourceLabel).
WithCheckpoint(wb.checkpoint).
WithBatchSize(batchSize).
WithErrorHandler(wb.errHandler)

View File

@ -54,10 +54,21 @@ var (
Help: "byte size of data flushed to storage",
}, []string{
nodeIDLabelName,
msgTypeLabelName,
dataSourceLabelName,
segmentLevelLabelName,
})
DataNodeFlushedRows = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataNodeRole,
Name: "flushed_data_rows",
Help: "num of rows flushed to storage",
}, []string{
nodeIDLabelName,
dataSourceLabelName,
})
DataNodeNumProducers = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
@ -246,6 +257,7 @@ func RegisterDataNode(registry *prometheus.Registry) {
registry.MustRegister(DataNodeFlushBufferCount)
registry.MustRegister(DataNodeFlushReqCounter)
registry.MustRegister(DataNodeFlushedSize)
registry.MustRegister(DataNodeFlushedRows)
// compaction related
registry.MustRegister(DataNodeCompactionLatency)
registry.MustRegister(DataNodeCompactionLatencyInQueue)

View File

@ -57,6 +57,9 @@ const (
FlushingSegmentLabel = "Flushing"
DroppedSegmentLabel = "Dropped"
StreamingDataSourceLabel = "streaming"
BulkinsertDataSourceLabel = "bulkinsert"
Leader = "OnLeader"
FromLeader = "FromLeader"
@ -101,6 +104,7 @@ const (
cacheNameLabelName = "cache_name"
cacheStateLabelName = "cache_state"
indexCountLabelName = "indexed_field_count"
dataSourceLabelName = "data_source"
requestScope = "scope"
fullMethodLabelName = "full_method"
reduceLevelName = "reduce_level"