2021-10-25 19:46:28 +08:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2021-06-22 10:42:07 +08:00
package datacoord
2021-05-25 15:35:37 +08:00
import (
"context"
"fmt"
"strconv"
2021-05-28 09:55:21 +08:00
"sync/atomic"
2021-11-05 22:25:00 +08:00
"time"
2021-05-25 15:35:37 +08:00
2021-09-22 19:33:54 +08:00
"github.com/milvus-io/milvus/internal/util/trace"
2021-05-25 15:35:37 +08:00
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/milvuspb"
2021-09-01 10:13:15 +08:00
"github.com/milvus-io/milvus/internal/util/metricsinfo"
2021-05-25 15:35:37 +08:00
"go.uber.org/zap"
)
2021-09-06 11:12:42 +08:00
// checks whether server in Healthy State
2021-05-28 09:55:21 +08:00
func ( s * Server ) isClosed ( ) bool {
2021-06-29 10:46:13 +08:00
return atomic . LoadInt64 ( & s . isServing ) != ServerStateHealthy
2021-05-25 15:35:37 +08:00
}
2021-09-06 11:12:42 +08:00
// GetTimeTickChannel legacy API, returns time tick channel name
2021-05-25 15:35:37 +08:00
func ( s * Server ) GetTimeTickChannel ( ctx context . Context ) ( * milvuspb . StringResponse , error ) {
return & milvuspb . StringResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
} ,
Value : Params . TimeTickChannelName ,
} , nil
}
2021-09-06 11:12:42 +08:00
// GetStatisticsChannel legacy API, returns statistics channel name
2021-05-25 15:35:37 +08:00
func ( s * Server ) GetStatisticsChannel ( ctx context . Context ) ( * milvuspb . StringResponse , error ) {
return & milvuspb . StringResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
} ,
Value : Params . StatisticsChannelName ,
} , nil
}
2021-09-06 11:12:42 +08:00
// Flush notify segment to flush
// this api only guarantees all the segments requested is sealed
// these segments will be flushed only after the Flush policy is fulfilled
2021-06-23 16:56:11 +08:00
func ( s * Server ) Flush ( ctx context . Context , req * datapb . FlushRequest ) ( * datapb . FlushResponse , error ) {
2021-07-28 11:43:22 +08:00
log . Debug ( "receive flush request" , zap . Int64 ( "dbID" , req . GetDbID ( ) ) , zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) )
2021-09-22 19:33:54 +08:00
sp , ctx := trace . StartSpanFromContextWithOperationName ( ctx , "DataCoord-Flush" )
defer sp . Finish ( )
2021-06-23 16:56:11 +08:00
resp := & datapb . FlushResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
Reason : "" ,
} ,
DbID : 0 ,
CollectionID : 0 ,
SegmentIDs : [ ] int64 { } ,
2021-05-28 09:55:21 +08:00
}
if s . isClosed ( ) {
2021-06-23 16:56:11 +08:00
resp . Status . Reason = serverNotServingErrMsg
2021-05-28 09:55:21 +08:00
return resp , nil
2021-05-25 15:35:37 +08:00
}
2021-06-23 16:56:11 +08:00
sealedSegments , err := s . segmentManager . SealAllSegments ( ctx , req . CollectionID )
if err != nil {
2021-07-28 11:43:22 +08:00
resp . Status . Reason = fmt . Sprintf ( "failed to flush %d, %s" , req . CollectionID , err )
2021-05-28 09:55:21 +08:00
return resp , nil
2021-05-25 15:35:37 +08:00
}
2021-09-30 18:12:25 +08:00
log . Debug ( "flush response with segments" ,
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
zap . Any ( "segments" , sealedSegments ) )
2021-06-23 16:56:11 +08:00
resp . Status . ErrorCode = commonpb . ErrorCode_Success
resp . DbID = req . GetDbID ( )
resp . CollectionID = req . GetCollectionID ( )
resp . SegmentIDs = sealedSegments
return resp , nil
2021-05-25 15:35:37 +08:00
}
2021-09-06 11:12:42 +08:00
// AssignSegmentID applies for segment ids and make allocation for records
2021-05-25 15:35:37 +08:00
func ( s * Server ) AssignSegmentID ( ctx context . Context , req * datapb . AssignSegmentIDRequest ) ( * datapb . AssignSegmentIDResponse , error ) {
2021-05-28 09:55:21 +08:00
if s . isClosed ( ) {
2021-05-25 15:35:37 +08:00
return & datapb . AssignSegmentIDResponse {
Status : & commonpb . Status {
2021-06-22 18:24:08 +08:00
Reason : serverNotServingErrMsg ,
2021-05-25 15:35:37 +08:00
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
} , nil
}
assigns := make ( [ ] * datapb . SegmentIDAssignment , 0 , len ( req . SegmentIDRequests ) )
for _ , r := range req . SegmentIDRequests {
2021-07-28 11:43:22 +08:00
log . Debug ( "handle assign segment request" ,
2021-06-08 19:25:37 +08:00
zap . Int64 ( "collectionID" , r . GetCollectionID ( ) ) ,
zap . Int64 ( "partitionID" , r . GetPartitionID ( ) ) ,
zap . String ( "channelName" , r . GetChannelName ( ) ) ,
zap . Uint32 ( "count" , r . GetCount ( ) ) )
2021-07-07 14:02:01 +08:00
if coll := s . meta . GetCollection ( r . CollectionID ) ; coll == nil {
2021-06-21 17:28:03 +08:00
if err := s . loadCollectionFromRootCoord ( ctx , r . CollectionID ) ; err != nil {
log . Error ( "load collection from rootcoord error" ,
2021-05-25 15:35:37 +08:00
zap . Int64 ( "collectionID" , r . CollectionID ) ,
zap . Error ( err ) )
continue
}
}
2021-07-23 21:58:33 +08:00
2021-07-12 11:03:52 +08:00
s . cluster . Watch ( r . ChannelName , r . CollectionID )
2021-05-29 10:47:29 +08:00
2021-07-23 21:58:33 +08:00
allocations , err := s . segmentManager . AllocSegment ( ctx ,
2021-05-25 15:35:37 +08:00
r . CollectionID , r . PartitionID , r . ChannelName , int64 ( r . Count ) )
if err != nil {
2021-07-23 21:58:33 +08:00
log . Warn ( "failed to alloc segment" , zap . Any ( "request" , r ) , zap . Error ( err ) )
2021-05-25 15:35:37 +08:00
continue
}
2021-07-23 21:58:33 +08:00
log . Debug ( "Assign segment success" , zap . Any ( "assignments" , allocations ) )
2021-06-08 19:25:37 +08:00
2021-07-23 21:58:33 +08:00
for _ , allocation := range allocations {
result := & datapb . SegmentIDAssignment {
SegID : allocation . SegmentID ,
ChannelName : r . ChannelName ,
Count : uint32 ( allocation . NumOfRows ) ,
CollectionID : r . CollectionID ,
PartitionID : r . PartitionID ,
ExpireTime : allocation . ExpireTime ,
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
Reason : "" ,
} ,
}
assigns = append ( assigns , result )
2021-05-25 15:35:37 +08:00
}
}
return & datapb . AssignSegmentIDResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
} ,
SegIDAssignments : assigns ,
} , nil
}
2021-05-26 19:06:56 +08:00
2021-09-06 11:12:42 +08:00
// GetSegmentStates returns segments state
2021-05-25 15:35:37 +08:00
func ( s * Server ) GetSegmentStates ( ctx context . Context , req * datapb . GetSegmentStatesRequest ) ( * datapb . GetSegmentStatesResponse , error ) {
resp := & datapb . GetSegmentStatesResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
2021-05-28 09:55:21 +08:00
if s . isClosed ( ) {
2021-06-22 18:24:08 +08:00
resp . Status . Reason = serverNotServingErrMsg
2021-05-25 15:35:37 +08:00
return resp , nil
}
for _ , segmentID := range req . SegmentIDs {
state := & datapb . SegmentStateInfo {
Status : & commonpb . Status { } ,
SegmentID : segmentID ,
}
2021-07-07 14:02:01 +08:00
segmentInfo := s . meta . GetSegment ( segmentID )
if segmentInfo == nil {
2021-05-25 15:35:37 +08:00
state . Status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-07-28 11:43:22 +08:00
state . Status . Reason = fmt . Sprintf ( "failed to get segment %d" , segmentID )
2021-05-25 15:35:37 +08:00
} else {
state . Status . ErrorCode = commonpb . ErrorCode_Success
2021-06-15 11:06:42 +08:00
state . State = segmentInfo . GetState ( )
state . StartPosition = segmentInfo . GetStartPosition ( )
2021-05-25 15:35:37 +08:00
}
resp . States = append ( resp . States , state )
}
resp . Status . ErrorCode = commonpb . ErrorCode_Success
return resp , nil
}
2021-09-06 11:12:42 +08:00
// GetInsertBinlogPaths returns binlog paths info for requested segments
2021-05-25 15:35:37 +08:00
func ( s * Server ) GetInsertBinlogPaths ( ctx context . Context , req * datapb . GetInsertBinlogPathsRequest ) ( * datapb . GetInsertBinlogPathsResponse , error ) {
resp := & datapb . GetInsertBinlogPathsResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
2021-06-09 15:02:48 +08:00
if s . isClosed ( ) {
2021-06-22 18:24:08 +08:00
resp . Status . Reason = serverNotServingErrMsg
2021-06-09 15:02:48 +08:00
return resp , nil
}
2021-08-19 13:00:12 +08:00
segment := s . meta . GetSegment ( req . GetSegmentID ( ) )
if segment == nil {
resp . Status . Reason = "segment not found"
2021-05-25 15:35:37 +08:00
return resp , nil
}
2021-08-19 13:00:12 +08:00
binlogs := segment . GetBinlogs ( )
fids := make ( [ ] UniqueID , 0 , len ( binlogs ) )
paths := make ( [ ] * internalpb . StringList , 0 , len ( binlogs ) )
for _ , field := range binlogs {
fids = append ( fids , field . GetFieldID ( ) )
paths = append ( paths , & internalpb . StringList { Values : field . GetBinlogs ( ) } )
2021-05-25 15:35:37 +08:00
}
resp . Status . ErrorCode = commonpb . ErrorCode_Success
resp . FieldIDs = fids
resp . Paths = paths
return resp , nil
}
2021-09-06 11:12:42 +08:00
// GetCollectionStatistics returns statistics for collection
// for now only row count is returned
2021-05-25 15:35:37 +08:00
func ( s * Server ) GetCollectionStatistics ( ctx context . Context , req * datapb . GetCollectionStatisticsRequest ) ( * datapb . GetCollectionStatisticsResponse , error ) {
resp := & datapb . GetCollectionStatisticsResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
2021-06-09 15:02:48 +08:00
if s . isClosed ( ) {
2021-06-22 18:24:08 +08:00
resp . Status . Reason = serverNotServingErrMsg
2021-06-09 15:02:48 +08:00
return resp , nil
}
2021-07-07 14:02:01 +08:00
nums := s . meta . GetNumRowsOfCollection ( req . CollectionID )
2021-05-25 15:35:37 +08:00
resp . Status . ErrorCode = commonpb . ErrorCode_Success
resp . Stats = append ( resp . Stats , & commonpb . KeyValuePair { Key : "row_count" , Value : strconv . FormatInt ( nums , 10 ) } )
return resp , nil
}
2021-09-06 11:12:42 +08:00
// GetPartitionStatistics return statistics for parition
// for now only row count is returned
2021-05-25 15:35:37 +08:00
func ( s * Server ) GetPartitionStatistics ( ctx context . Context , req * datapb . GetPartitionStatisticsRequest ) ( * datapb . GetPartitionStatisticsResponse , error ) {
resp := & datapb . GetPartitionStatisticsResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
2021-06-09 15:02:48 +08:00
if s . isClosed ( ) {
2021-06-22 18:24:08 +08:00
resp . Status . Reason = serverNotServingErrMsg
2021-06-09 15:02:48 +08:00
return resp , nil
}
2021-07-07 14:02:01 +08:00
nums := s . meta . GetNumRowsOfPartition ( req . CollectionID , req . PartitionID )
2021-05-25 15:35:37 +08:00
resp . Status . ErrorCode = commonpb . ErrorCode_Success
resp . Stats = append ( resp . Stats , & commonpb . KeyValuePair { Key : "row_count" , Value : strconv . FormatInt ( nums , 10 ) } )
return resp , nil
}
2021-09-06 11:12:42 +08:00
// GetSegmentInfoChannel legacy API, returns segment info statistics channel
2021-05-25 15:35:37 +08:00
func ( s * Server ) GetSegmentInfoChannel ( ctx context . Context ) ( * milvuspb . StringResponse , error ) {
return & milvuspb . StringResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
} ,
Value : Params . SegmentInfoChannelName ,
} , nil
}
2021-09-06 11:12:42 +08:00
// GetSegmentInfo returns segment info requested, status, row count, etc included
2021-05-25 15:35:37 +08:00
func ( s * Server ) GetSegmentInfo ( ctx context . Context , req * datapb . GetSegmentInfoRequest ) ( * datapb . GetSegmentInfoResponse , error ) {
resp := & datapb . GetSegmentInfoResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
2021-05-28 09:55:21 +08:00
if s . isClosed ( ) {
2021-06-22 18:24:08 +08:00
resp . Status . Reason = serverNotServingErrMsg
2021-05-25 15:35:37 +08:00
return resp , nil
}
infos := make ( [ ] * datapb . SegmentInfo , 0 , len ( req . SegmentIDs ) )
for _ , id := range req . SegmentIDs {
2021-07-07 14:02:01 +08:00
info := s . meta . GetSegment ( id )
if info == nil {
2021-07-28 11:43:22 +08:00
resp . Status . Reason = fmt . Sprintf ( "failed to get segment %d" , id )
2021-05-25 15:35:37 +08:00
return resp , nil
}
2021-07-12 17:24:25 +08:00
infos = append ( infos , info . SegmentInfo )
2021-05-25 15:35:37 +08:00
}
resp . Status . ErrorCode = commonpb . ErrorCode_Success
resp . Infos = infos
return resp , nil
}
2021-09-06 11:12:42 +08:00
// SaveBinlogPaths update segment related binlog path
// works for Checkpoints and Flush
2021-05-25 15:35:37 +08:00
func ( s * Server ) SaveBinlogPaths ( ctx context . Context , req * datapb . SaveBinlogPathsRequest ) ( * commonpb . Status , error ) {
resp := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
}
2021-10-14 15:44:34 +08:00
2021-05-28 09:55:21 +08:00
if s . isClosed ( ) {
2021-06-22 18:24:08 +08:00
resp . Reason = serverNotServingErrMsg
2021-05-25 15:35:37 +08:00
return resp , nil
}
2021-10-14 15:44:34 +08:00
2021-07-28 11:43:22 +08:00
log . Debug ( "receive SaveBinlogPaths request" ,
2021-06-08 19:25:37 +08:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
2021-06-18 16:02:05 +08:00
zap . Int64 ( "segmentID" , req . GetSegmentID ( ) ) ,
2021-10-13 20:54:33 +08:00
zap . Bool ( "isFlush" , req . GetFlushed ( ) ) ,
2021-06-18 16:02:05 +08:00
zap . Any ( "checkpoints" , req . GetCheckPoints ( ) ) )
2021-05-25 15:35:37 +08:00
2021-10-14 15:44:34 +08:00
// validate
nodeID := req . GetBase ( ) . GetSourceID ( )
segmentID := req . GetSegmentID ( )
segment := s . meta . GetSegment ( segmentID )
if segment == nil {
FailResponse ( resp , fmt . Sprintf ( "failed to get segment %d" , segmentID ) )
log . Error ( "failed to get segment" , zap . Int64 ( "segmentID" , segmentID ) )
return resp , nil
}
channel := segment . GetInsertChannel ( )
if ! s . channelManager . Match ( nodeID , channel ) {
FailResponse ( resp , fmt . Sprintf ( "channel %s is not watched on node %d" , channel , nodeID ) )
log . Warn ( "node is not matched with channel" , zap . String ( "channel" , channel ) , zap . Int64 ( "nodeID" , nodeID ) )
}
2021-06-04 11:45:45 +08:00
// set segment to SegmentState_Flushing and save binlogs and checkpoints
2021-08-19 13:00:12 +08:00
err := s . meta . UpdateFlushSegmentsInfo ( req . GetSegmentID ( ) , req . GetFlushed ( ) ,
2021-10-19 14:32:41 +08:00
req . GetField2BinlogPaths ( ) , req . GetField2StatslogPaths ( ) , req . GetDeltalogs ( ) ,
req . GetCheckPoints ( ) , req . GetStartPositions ( ) )
2021-05-25 15:35:37 +08:00
if err != nil {
2021-07-28 11:43:22 +08:00
log . Error ( "save binlog and checkpoints failed" ,
2021-06-08 19:25:37 +08:00
zap . Int64 ( "segmentID" , req . GetSegmentID ( ) ) ,
zap . Error ( err ) )
2021-05-25 15:35:37 +08:00
resp . Reason = err . Error ( )
2021-05-27 18:45:24 +08:00
return resp , nil
2021-05-25 15:35:37 +08:00
}
2021-10-14 15:44:34 +08:00
2021-07-28 11:43:22 +08:00
log . Debug ( "flush segment with meta" , zap . Int64 ( "id" , req . SegmentID ) ,
2021-08-19 13:00:12 +08:00
zap . Any ( "meta" , req . GetField2BinlogPaths ( ) ) )
2021-05-25 15:35:37 +08:00
2021-06-04 11:45:45 +08:00
if req . Flushed {
s . segmentManager . DropSegment ( ctx , req . SegmentID )
s . flushCh <- req . SegmentID
2021-11-05 22:25:00 +08:00
if Params . EnableCompaction {
cctx , cancel := context . WithTimeout ( s . ctx , 5 * time . Second )
defer cancel ( )
2021-11-08 21:45:00 +08:00
tt , err := getTimetravelReverseTime ( cctx , s . allocator )
2021-11-05 22:25:00 +08:00
if err == nil {
if err = s . compactionTrigger . triggerSingleCompaction ( segment . GetCollectionID ( ) , segment . GetPartitionID ( ) ,
segmentID , segment . GetInsertChannel ( ) , tt ) ; err != nil {
log . Warn ( "failed to trigger single compaction" , zap . Int64 ( "segmentID" , segmentID ) )
}
}
}
2021-06-04 11:45:45 +08:00
}
2021-05-25 15:35:37 +08:00
resp . ErrorCode = commonpb . ErrorCode_Success
return resp , nil
}
2021-05-28 09:55:21 +08:00
2021-09-24 21:33:55 +08:00
// GetComponentStates returns DataCoord's current state
2021-05-28 09:55:21 +08:00
func ( s * Server ) GetComponentStates ( ctx context . Context ) ( * internalpb . ComponentStates , error ) {
2021-06-02 15:11:17 +08:00
resp := & internalpb . ComponentStates {
State : & internalpb . ComponentInfo {
NodeID : Params . NodeID ,
2021-06-21 18:22:13 +08:00
Role : "datacoord" ,
2021-06-02 15:11:17 +08:00
StateCode : 0 ,
} ,
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
Reason : "" ,
} ,
}
state := atomic . LoadInt64 ( & s . isServing )
switch state {
2021-06-29 10:46:13 +08:00
case ServerStateInitializing :
2021-06-02 15:11:17 +08:00
resp . State . StateCode = internalpb . StateCode_Initializing
2021-06-29 10:46:13 +08:00
case ServerStateHealthy :
2021-06-02 15:11:17 +08:00
resp . State . StateCode = internalpb . StateCode_Healthy
default :
resp . State . StateCode = internalpb . StateCode_Abnormal
}
return resp , nil
2021-05-28 09:55:21 +08:00
}
2021-09-06 11:12:42 +08:00
// GetRecoveryInfo get recovery info for segment
2021-06-07 09:47:36 +08:00
func ( s * Server ) GetRecoveryInfo ( ctx context . Context , req * datapb . GetRecoveryInfoRequest ) ( * datapb . GetRecoveryInfoResponse , error ) {
collectionID := req . GetCollectionID ( )
partitionID := req . GetPartitionID ( )
2021-07-28 11:43:22 +08:00
log . Info ( "receive get recovery info request" ,
2021-06-08 19:25:37 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
2021-06-07 09:47:36 +08:00
zap . Int64 ( "partitionID" , partitionID ) )
resp := & datapb . GetRecoveryInfoResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
2021-06-09 15:02:48 +08:00
if s . isClosed ( ) {
2021-06-22 18:24:08 +08:00
resp . Status . Reason = serverNotServingErrMsg
2021-06-09 15:02:48 +08:00
return resp , nil
}
2021-11-05 22:25:00 +08:00
segmentIDs := s . meta . GetSegmentsIDOfPartition ( collectionID , partitionID )
2021-06-07 09:47:36 +08:00
segment2Binlogs := make ( map [ UniqueID ] [ ] * datapb . FieldBinlog )
2021-10-22 14:31:13 +08:00
segment2StatsBinlogs := make ( map [ UniqueID ] [ ] * datapb . FieldBinlog )
segment2DeltaBinlogs := make ( map [ UniqueID ] [ ] * datapb . DeltaLogInfo )
2021-09-07 11:35:18 +08:00
segmentsNumOfRows := make ( map [ UniqueID ] int64 )
2021-10-22 14:31:13 +08:00
flushedIDs := make ( map [ int64 ] struct { } )
2021-06-07 09:47:36 +08:00
for _ , id := range segmentIDs {
2021-07-07 14:02:01 +08:00
segment := s . meta . GetSegment ( id )
if segment == nil {
2021-07-28 11:43:22 +08:00
errMsg := fmt . Sprintf ( "failed to get segment %d" , id )
2021-07-07 14:02:01 +08:00
log . Error ( errMsg )
resp . Status . Reason = errMsg
2021-06-15 19:23:55 +08:00
return resp , nil
}
if segment . State != commonpb . SegmentState_Flushed && segment . State != commonpb . SegmentState_Flushing {
continue
}
2021-10-22 14:31:13 +08:00
_ , ok := flushedIDs [ id ]
if ! ok {
flushedIDs [ id ] = struct { } { }
}
2021-06-15 19:23:55 +08:00
2021-08-19 13:00:12 +08:00
binlogs := segment . GetBinlogs ( )
2021-06-07 09:47:36 +08:00
field2Binlog := make ( map [ UniqueID ] [ ] string )
2021-08-19 13:00:12 +08:00
for _ , field := range binlogs {
field2Binlog [ field . GetFieldID ( ) ] = append ( field2Binlog [ field . GetFieldID ( ) ] , field . GetBinlogs ( ) ... )
2021-06-07 09:47:36 +08:00
}
for f , paths := range field2Binlog {
fieldBinlogs := & datapb . FieldBinlog {
FieldID : f ,
Binlogs : paths ,
}
segment2Binlogs [ id ] = append ( segment2Binlogs [ id ] , fieldBinlogs )
}
2021-09-07 11:35:18 +08:00
segmentsNumOfRows [ id ] = segment . NumOfRows
2021-10-22 14:31:13 +08:00
statsBinlogs := segment . GetStatslogs ( )
field2StatsBinlog := make ( map [ UniqueID ] [ ] string )
for _ , field := range statsBinlogs {
field2StatsBinlog [ field . GetFieldID ( ) ] = append ( field2StatsBinlog [ field . GetFieldID ( ) ] , field . GetBinlogs ( ) ... )
}
for f , paths := range field2StatsBinlog {
fieldBinlogs := & datapb . FieldBinlog {
FieldID : f ,
Binlogs : paths ,
}
segment2StatsBinlogs [ id ] = append ( segment2StatsBinlogs [ id ] , fieldBinlogs )
}
segment2DeltaBinlogs [ id ] = append ( segment2DeltaBinlogs [ id ] , segment . GetDeltalogs ( ) ... )
2021-06-07 09:47:36 +08:00
}
binlogs := make ( [ ] * datapb . SegmentBinlogs , 0 , len ( segment2Binlogs ) )
2021-10-22 14:31:13 +08:00
for segmentID := range flushedIDs {
2021-06-07 09:47:36 +08:00
sbl := & datapb . SegmentBinlogs {
SegmentID : segmentID ,
2021-09-07 11:35:18 +08:00
NumOfRows : segmentsNumOfRows [ segmentID ] ,
2021-10-22 14:31:13 +08:00
FieldBinlogs : segment2Binlogs [ segmentID ] ,
Statslogs : segment2StatsBinlogs [ segmentID ] ,
Deltalogs : segment2DeltaBinlogs [ segmentID ] ,
2021-06-07 09:47:36 +08:00
}
binlogs = append ( binlogs , sbl )
}
2021-06-21 17:28:03 +08:00
dresp , err := s . rootCoordClient . DescribeCollection ( s . ctx , & milvuspb . DescribeCollectionRequest {
2021-06-07 09:47:36 +08:00
Base : & commonpb . MsgBase {
MsgType : commonpb . MsgType_DescribeCollection ,
SourceID : Params . NodeID ,
} ,
CollectionID : collectionID ,
} )
if err = VerifyResponse ( dresp , err ) ; err != nil {
2021-07-28 11:43:22 +08:00
log . Error ( "get collection info from master failed" ,
2021-06-07 09:47:36 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Error ( err ) )
resp . Status . Reason = err . Error ( )
return resp , nil
}
channels := dresp . GetVirtualChannelNames ( )
2021-10-14 15:44:34 +08:00
channelInfos := make ( [ ] * datapb . VchannelInfo , 0 , len ( channels ) )
2021-06-07 09:47:36 +08:00
for _ , c := range channels {
2021-10-28 14:30:26 +08:00
channelInfo := s . GetVChanPositions ( c , collectionID , true )
2021-10-14 15:44:34 +08:00
channelInfos = append ( channelInfos , channelInfo )
2021-06-07 09:47:36 +08:00
}
resp . Binlogs = binlogs
resp . Channels = channelInfos
resp . Status . ErrorCode = commonpb . ErrorCode_Success
return resp , nil
}
2021-07-02 11:16:20 +08:00
2021-09-06 11:12:42 +08:00
// GetFlushedSegments returns all segment matches provided criterion and in State Flushed
// If requested partition id < 0, ignores the partition id filter
2021-07-02 11:16:20 +08:00
func ( s * Server ) GetFlushedSegments ( ctx context . Context , req * datapb . GetFlushedSegmentsRequest ) ( * datapb . GetFlushedSegmentsResponse , error ) {
2021-09-06 11:12:42 +08:00
resp := & datapb . GetFlushedSegmentsResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
2021-07-02 11:16:20 +08:00
collectionID := req . GetCollectionID ( )
partitionID := req . GetPartitionID ( )
2021-09-06 11:12:42 +08:00
log . Debug ( "GetFlushedSegment" ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "partitionID" , partitionID ) )
if s . isClosed ( ) {
resp . Status . Reason = serverNotServingErrMsg
return resp , nil
}
2021-07-02 11:16:20 +08:00
var segmentIDs [ ] UniqueID
if partitionID < 0 {
2021-11-05 22:25:00 +08:00
segmentIDs = s . meta . GetSegmentsIDOfCollection ( collectionID )
2021-07-02 11:16:20 +08:00
} else {
2021-11-05 22:25:00 +08:00
segmentIDs = s . meta . GetSegmentsIDOfPartition ( collectionID , partitionID )
2021-07-02 11:16:20 +08:00
}
ret := make ( [ ] UniqueID , 0 , len ( segmentIDs ) )
for _ , id := range segmentIDs {
2021-07-07 14:02:01 +08:00
s := s . meta . GetSegment ( id )
2021-11-05 22:25:00 +08:00
if s != nil && s . GetState ( ) != commonpb . SegmentState_Flushed {
2021-07-02 11:16:20 +08:00
continue
}
2021-11-05 22:25:00 +08:00
// if this segment == nil, we assume this segment has been compacted and flushed
2021-07-02 11:16:20 +08:00
ret = append ( ret , id )
}
2021-09-06 11:12:42 +08:00
resp . Segments = ret
resp . Status . ErrorCode = commonpb . ErrorCode_Success
return resp , nil
2021-07-02 11:16:20 +08:00
}
2021-09-01 10:13:15 +08:00
2021-09-06 11:12:42 +08:00
// GetMetrics returns DataCoord metrics info
// it may include SystemMetrics, Topology metrics, etc.
2021-09-01 10:13:15 +08:00
func ( s * Server ) GetMetrics ( ctx context . Context , req * milvuspb . GetMetricsRequest ) ( * milvuspb . GetMetricsResponse , error ) {
log . Debug ( "DataCoord.GetMetrics" ,
zap . Int64 ( "node_id" , Params . NodeID ) ,
zap . String ( "req" , req . Request ) )
if s . isClosed ( ) {
log . Warn ( "DataCoord.GetMetrics failed" ,
zap . Int64 ( "node_id" , Params . NodeID ) ,
zap . String ( "req" , req . Request ) ,
zap . Error ( errDataCoordIsUnhealthy ( Params . NodeID ) ) )
return & milvuspb . GetMetricsResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
Reason : msgDataCoordIsUnhealthy ( Params . NodeID ) ,
} ,
Response : "" ,
} , nil
}
metricType , err := metricsinfo . ParseMetricType ( req . Request )
if err != nil {
log . Warn ( "DataCoord.GetMetrics failed to parse metric type" ,
zap . Int64 ( "node_id" , Params . NodeID ) ,
zap . String ( "req" , req . Request ) ,
zap . Error ( err ) )
return & milvuspb . GetMetricsResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
Reason : err . Error ( ) ,
} ,
Response : "" ,
} , nil
}
log . Debug ( "DataCoord.GetMetrics" ,
zap . String ( "metric_type" , metricType ) )
if metricType == metricsinfo . SystemInfoMetrics {
2021-09-03 17:15:26 +08:00
ret , err := s . metricsCacheManager . GetSystemInfoMetrics ( )
if err == nil && ret != nil {
return ret , nil
}
log . Debug ( "failed to get system info metrics from cache, recompute instead" ,
zap . Error ( err ) )
2021-09-01 10:13:15 +08:00
metrics , err := s . getSystemInfoMetrics ( ctx , req )
log . Debug ( "DataCoord.GetMetrics" ,
zap . Int64 ( "node_id" , Params . NodeID ) ,
zap . String ( "req" , req . Request ) ,
zap . String ( "metric_type" , metricType ) ,
zap . Any ( "metrics" , metrics ) , // TODO(dragondriver): necessary? may be very large
zap . Error ( err ) )
2021-09-03 17:15:26 +08:00
s . metricsCacheManager . UpdateSystemInfoMetrics ( metrics )
2021-09-01 10:13:15 +08:00
return metrics , err
}
log . Debug ( "DataCoord.GetMetrics failed, request metric type is not implemented yet" ,
zap . Int64 ( "node_id" , Params . NodeID ) ,
zap . String ( "req" , req . Request ) ,
zap . String ( "metric_type" , metricType ) )
return & milvuspb . GetMetricsResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
Reason : metricsinfo . MsgUnimplementedMetric ,
} ,
Response : "" ,
} , nil
}
2021-11-05 22:25:00 +08:00
2021-11-10 23:57:38 +08:00
// CompleteCompaction completes a compaction with the result
2021-11-05 22:25:00 +08:00
func ( s * Server ) CompleteCompaction ( ctx context . Context , req * datapb . CompactionResult ) ( * commonpb . Status , error ) {
log . Debug ( "receive complete compaction request" , zap . Int64 ( "planID" , req . PlanID ) , zap . Int64 ( "segmentID" , req . GetSegmentID ( ) ) )
resp := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
}
if s . isClosed ( ) {
log . Warn ( "failed to complete compaction" , zap . Int64 ( "planID" , req . PlanID ) ,
zap . Error ( errDataCoordIsUnhealthy ( Params . NodeID ) ) )
resp . Reason = msgDataCoordIsUnhealthy ( Params . NodeID )
return resp , nil
}
if ! Params . EnableCompaction {
resp . Reason = "compaction disabled"
return resp , nil
}
if err := s . compactionHandler . completeCompaction ( req ) ; err != nil {
log . Error ( "failed to complete compaction" , zap . Int64 ( "planID" , req . PlanID ) , zap . Error ( err ) )
resp . Reason = err . Error ( )
return resp , nil
}
log . Debug ( "success to complete compaction" , zap . Int64 ( "planID" , req . PlanID ) )
resp . ErrorCode = commonpb . ErrorCode_Success
return resp , nil
}
2021-11-09 14:47:02 +08:00
func ( s * Server ) ManualCompaction ( ctx context . Context , req * milvuspb . ManualCompactionRequest ) ( * milvuspb . ManualCompactionResponse , error ) {
2021-11-05 22:25:00 +08:00
log . Debug ( "receive manual compaction" , zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) )
2021-11-09 14:47:02 +08:00
resp := & milvuspb . ManualCompactionResponse {
2021-11-05 22:25:00 +08:00
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
if s . isClosed ( ) {
log . Warn ( "failed to execute manual compaction" , zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
zap . Error ( errDataCoordIsUnhealthy ( Params . NodeID ) ) )
resp . Status . Reason = msgDataCoordIsUnhealthy ( Params . NodeID )
return resp , nil
}
if ! Params . EnableCompaction {
resp . Status . Reason = "compaction disabled"
return resp , nil
}
id , err := s . compactionTrigger . forceTriggerCompaction ( req . CollectionID , & timetravel { req . Timetravel } )
if err != nil {
log . Error ( "failed to trigger manual compaction" , zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) , zap . Error ( err ) )
resp . Status . Reason = err . Error ( )
return resp , nil
}
log . Debug ( "success to trigger manual compaction" , zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) , zap . Int64 ( "compactionID" , id ) )
resp . Status . ErrorCode = commonpb . ErrorCode_Success
resp . CompactionID = id
return resp , nil
}
2021-11-09 14:47:02 +08:00
func ( s * Server ) GetCompactionState ( ctx context . Context , req * milvuspb . GetCompactionStateRequest ) ( * milvuspb . GetCompactionStateResponse , error ) {
2021-11-05 22:25:00 +08:00
log . Debug ( "receive get compaction state request" , zap . Int64 ( "compactionID" , req . GetCompactionID ( ) ) )
2021-11-09 14:47:02 +08:00
resp := & milvuspb . GetCompactionStateResponse {
2021-11-05 22:25:00 +08:00
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
}
if s . isClosed ( ) {
log . Warn ( "failed to get compaction state" , zap . Int64 ( "compactionID" , req . GetCompactionID ( ) ) ,
zap . Error ( errDataCoordIsUnhealthy ( Params . NodeID ) ) )
resp . Status . Reason = msgDataCoordIsUnhealthy ( Params . NodeID )
return resp , nil
}
if ! Params . EnableCompaction {
resp . Status . Reason = "compaction disabled"
return resp , nil
}
2021-11-09 14:47:02 +08:00
tasks := s . compactionHandler . getCompactionTasksBySignalID ( req . GetCompactionID ( ) )
state , executingCnt , completedCnt , timeoutCnt := getCompactionState ( tasks )
resp . State = state
resp . ExecutingPlanNo = int64 ( executingCnt )
resp . CompletedPlanNo = int64 ( completedCnt )
resp . TimeoutPlanNo = int64 ( timeoutCnt )
resp . Status . ErrorCode = commonpb . ErrorCode_Success
return resp , nil
}
func ( s * Server ) GetCompactionStateWithPlans ( ctx context . Context , req * milvuspb . GetCompactionPlansRequest ) ( * milvuspb . GetCompactionPlansResponse , error ) {
log . Debug ( "received GetCompactionStateWithPlans request" , zap . Int64 ( "compactionID" , req . GetCompactionID ( ) ) )
resp := & milvuspb . GetCompactionPlansResponse {
Status : & commonpb . Status { ErrorCode : commonpb . ErrorCode_UnexpectedError } ,
}
if s . isClosed ( ) {
log . Warn ( "failed to get compaction state with plans" , zap . Int64 ( "compactionID" , req . GetCompactionID ( ) ) , zap . Error ( errDataCoordIsUnhealthy ( Params . NodeID ) ) )
resp . Status . Reason = msgDataCoordIsUnhealthy ( Params . NodeID )
return resp , nil
}
if ! Params . EnableCompaction {
resp . Status . Reason = "compaction disabled"
return resp , nil
2021-11-05 22:25:00 +08:00
}
2021-11-09 14:47:02 +08:00
tasks := s . compactionHandler . getCompactionTasksBySignalID ( req . GetCompactionID ( ) )
for _ , task := range tasks {
resp . MergeInfos = append ( resp . MergeInfos , getCompactionMergeInfo ( task ) )
}
state , _ , _ , _ := getCompactionState ( tasks )
2021-11-05 22:25:00 +08:00
resp . Status . ErrorCode = commonpb . ErrorCode_Success
2021-11-09 14:47:02 +08:00
resp . State = state
2021-11-05 22:25:00 +08:00
return resp , nil
}
2021-11-09 14:47:02 +08:00
func getCompactionMergeInfo ( task * compactionTask ) * milvuspb . CompactionMergeInfo {
segments := task . plan . GetSegmentBinlogs ( )
var sources [ ] int64
for _ , s := range segments {
sources = append ( sources , s . GetSegmentID ( ) )
}
var target int64 = - 1
if task . result != nil {
target = task . result . GetSegmentID ( )
}
return & milvuspb . CompactionMergeInfo {
Sources : sources ,
Target : target ,
}
}
func getCompactionState ( tasks [ ] * compactionTask ) ( state commonpb . CompactionState , executingCnt , completedCnt , timeoutCnt int ) {
for _ , t := range tasks {
switch t . state {
case executing :
executingCnt ++
case completed :
completedCnt ++
case timeout :
timeoutCnt ++
}
}
if executingCnt != 0 {
state = commonpb . CompactionState_Executing
} else {
state = commonpb . CompactionState_Completed
}
return
}