diff --git a/internal/indexnode/task.go b/internal/indexnode/task.go index 0a96b904c1..1c26eab006 100644 --- a/internal/indexnode/task.go +++ b/internal/indexnode/task.go @@ -228,7 +228,7 @@ func (it *indexBuildTask) LoadData(ctx context.Context) error { } loadFieldDataLatency := it.tr.CtxRecord(ctx, "load field data done") - metrics.IndexNodeLoadFieldLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(loadFieldDataLatency.Milliseconds())) + metrics.IndexNodeLoadFieldLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(loadFieldDataLatency.Seconds()) err = it.decodeBlobs(ctx, blobs) if err != nil { @@ -349,7 +349,7 @@ func (it *indexBuildTask) BuildIndex(ctx context.Context) error { } buildIndexLatency := it.tr.RecordSpan() - metrics.IndexNodeKnowhereBuildIndexLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(buildIndexLatency.Milliseconds())) + metrics.IndexNodeKnowhereBuildIndexLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(buildIndexLatency.Seconds()) log.Ctx(ctx).Info("Successfully build index", zap.Int64("buildID", it.BuildID), zap.Int64("Collection", it.collectionID), zap.Int64("SegmentID", it.segmentID)) return nil @@ -369,7 +369,7 @@ func (it *indexBuildTask) SaveIndexFiles(ctx context.Context) error { return err } encodeIndexFileDur := it.tr.Record("index serialize and upload done") - metrics.IndexNodeEncodeIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(encodeIndexFileDur.Milliseconds())) + metrics.IndexNodeEncodeIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(encodeIndexFileDur.Seconds()) // early release index for gc, and we can ensure that Delete is idempotent. gcIndex() @@ -388,7 +388,7 @@ func (it *indexBuildTask) SaveIndexFiles(ctx context.Context) error { it.node.storeIndexFilesAndStatistic(it.ClusterID, it.BuildID, saveFileKeys, it.serializedSize, &it.statistic) log.Ctx(ctx).Debug("save index files done", zap.Strings("IndexFiles", saveFileKeys)) saveIndexFileDur := it.tr.RecordSpan() - metrics.IndexNodeSaveIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(saveIndexFileDur.Milliseconds())) + metrics.IndexNodeSaveIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(saveIndexFileDur.Seconds()) it.tr.Elapse("index building all done") log.Ctx(ctx).Info("Successfully save index files", zap.Int64("buildID", it.BuildID), zap.Int64("Collection", it.collectionID), zap.Int64("partition", it.partitionID), zap.Int64("SegmentId", it.segmentID)) @@ -435,8 +435,7 @@ func (it *indexBuildTask) decodeBlobs(ctx context.Context, blobs []*storage.Blob if err2 != nil { return err2 } - decodeDuration := it.tr.RecordSpan().Milliseconds() - metrics.IndexNodeDecodeFieldLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(decodeDuration)) + metrics.IndexNodeDecodeFieldLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(it.tr.RecordSpan().Seconds()) if len(insertData.Data) != 1 { return errors.New("we expect only one field in deserialized insert data") diff --git a/internal/indexnode/task_scheduler.go b/internal/indexnode/task_scheduler.go index fe6b4edd4b..0a42493461 100644 --- a/internal/indexnode/task_scheduler.go +++ b/internal/indexnode/task_scheduler.go @@ -233,7 +233,7 @@ func (sched *TaskScheduler) processTask(t task, q TaskQueue) { } t.SetState(commonpb.IndexState_Finished, "") if indexBuildTask, ok := t.(*indexBuildTask); ok { - metrics.IndexNodeBuildIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(indexBuildTask.tr.ElapseSpan().Milliseconds())) + metrics.IndexNodeBuildIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(indexBuildTask.tr.ElapseSpan().Seconds()) metrics.IndexNodeIndexTaskLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(indexBuildTask.queueDur.Milliseconds())) } } diff --git a/pkg/metrics/indexnode_metrics.go b/pkg/metrics/indexnode_metrics.go index f85f59a116..3a02286307 100644 --- a/pkg/metrics/indexnode_metrics.go +++ b/pkg/metrics/indexnode_metrics.go @@ -23,6 +23,9 @@ import ( ) var ( + // unit second, from 1ms to 2hrs + indexBucket = []float64{0.001, 0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000} + IndexNodeBuildIndexTaskCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: milvusNamespace, @@ -37,7 +40,7 @@ var ( Subsystem: typeutil.IndexNodeRole, Name: "load_field_latency", Help: "latency of loading the field data", - Buckets: buckets, + Buckets: indexBucket, }, []string{nodeIDLabelName}) IndexNodeDecodeFieldLatency = prometheus.NewHistogramVec( @@ -46,7 +49,7 @@ var ( Subsystem: typeutil.IndexNodeRole, Name: "decode_field_latency", Help: "latency of decode field data", - Buckets: buckets, + Buckets: indexBucket, }, []string{nodeIDLabelName}) IndexNodeKnowhereBuildIndexLatency = prometheus.NewHistogramVec( @@ -55,7 +58,7 @@ var ( Subsystem: typeutil.IndexNodeRole, Name: "knowhere_build_index_latency", Help: "latency of building the index by knowhere", - Buckets: buckets, + Buckets: indexBucket, }, []string{nodeIDLabelName}) IndexNodeEncodeIndexFileLatency = prometheus.NewHistogramVec( @@ -64,7 +67,7 @@ var ( Subsystem: typeutil.IndexNodeRole, Name: "encode_index_latency", Help: "latency of encoding the index file", - Buckets: buckets, + Buckets: indexBucket, }, []string{nodeIDLabelName}) IndexNodeSaveIndexFileLatency = prometheus.NewHistogramVec( @@ -73,7 +76,7 @@ var ( Subsystem: typeutil.IndexNodeRole, Name: "save_index_latency", Help: "latency of saving the index file", - Buckets: buckets, + Buckets: indexBucket, }, []string{nodeIDLabelName}) IndexNodeIndexTaskLatencyInQueue = prometheus.NewHistogramVec( @@ -91,7 +94,7 @@ var ( Subsystem: typeutil.IndexNodeRole, Name: "build_index_latency", Help: "latency of build index for segment", - Buckets: buckets, + Buckets: indexBucket, }, []string{nodeIDLabelName}) )