2021-04-19 13:47:10 +08:00
|
|
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
|
|
|
|
2021-02-06 11:35:35 +08:00
|
|
|
package querynode
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2021-03-05 10:15:27 +08:00
|
|
|
"errors"
|
2021-04-07 18:29:19 +08:00
|
|
|
"fmt"
|
2021-03-05 10:15:27 +08:00
|
|
|
|
2021-06-19 11:45:09 +08:00
|
|
|
"github.com/golang/protobuf/proto"
|
2021-04-07 18:29:19 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/kv"
|
2021-06-19 11:45:09 +08:00
|
|
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
2021-04-22 14:45:57 +08:00
|
|
|
minioKV "github.com/milvus-io/milvus/internal/kv/minio"
|
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
2021-07-16 17:19:55 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
|
|
"github.com/milvus-io/milvus/internal/types"
|
2021-07-16 17:19:55 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/funcutil"
|
2021-02-06 11:35:35 +08:00
|
|
|
)
|
|
|
|
|
2021-06-19 11:45:09 +08:00
|
|
|
const (
|
2021-06-22 16:44:09 +08:00
|
|
|
queryCoordSegmentMetaPrefix = "queryCoord-segmentMeta"
|
|
|
|
queryNodeSegmentMetaPrefix = "queryNode-segmentMeta"
|
2021-06-19 11:45:09 +08:00
|
|
|
)
|
|
|
|
|
2021-02-06 11:35:35 +08:00
|
|
|
// segmentLoader is only responsible for loading the field data from binlog
|
|
|
|
type segmentLoader struct {
|
2021-06-15 12:41:40 +08:00
|
|
|
historicalReplica ReplicaInterface
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-06-21 18:22:13 +08:00
|
|
|
dataCoord types.DataCoord
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-06-19 11:45:09 +08:00
|
|
|
minioKV kv.BaseKV // minio minioKV
|
|
|
|
etcdKV *etcdkv.EtcdKV
|
2021-02-06 11:35:35 +08:00
|
|
|
|
|
|
|
indexLoader *indexLoader
|
|
|
|
}
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentOfConditionHandOff(req *querypb.LoadSegmentsRequest) error {
|
2021-06-15 12:41:40 +08:00
|
|
|
return errors.New("TODO: implement hand off")
|
|
|
|
}
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentOfConditionLoadBalance(req *querypb.LoadSegmentsRequest) error {
|
2021-06-15 12:41:40 +08:00
|
|
|
return loader.loadSegment(req, false)
|
|
|
|
}
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentOfConditionGRPC(req *querypb.LoadSegmentsRequest) error {
|
2021-06-15 12:41:40 +08:00
|
|
|
return loader.loadSegment(req, true)
|
|
|
|
}
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentOfConditionNodeDown(req *querypb.LoadSegmentsRequest) error {
|
2021-06-15 12:41:40 +08:00
|
|
|
return loader.loadSegment(req, true)
|
|
|
|
}
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) loadSegment(req *querypb.LoadSegmentsRequest, onService bool) error {
|
2021-06-15 12:41:40 +08:00
|
|
|
// no segment needs to load, return
|
|
|
|
if len(req.Infos) == 0 {
|
|
|
|
return nil
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-06-15 12:41:40 +08:00
|
|
|
// start to load
|
|
|
|
for _, info := range req.Infos {
|
|
|
|
segmentID := info.SegmentID
|
|
|
|
partitionID := info.PartitionID
|
|
|
|
collectionID := info.CollectionID
|
|
|
|
|
|
|
|
// init replica
|
|
|
|
hasCollectionInHistorical := loader.historicalReplica.hasCollection(collectionID)
|
|
|
|
hasPartitionInHistorical := loader.historicalReplica.hasPartition(partitionID)
|
|
|
|
if !hasCollectionInHistorical {
|
|
|
|
err := loader.historicalReplica.addCollection(collectionID, req.Schema)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !hasPartitionInHistorical {
|
|
|
|
err := loader.historicalReplica.addPartition(collectionID, partitionID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
collection, err := loader.historicalReplica.getCollectionByID(collectionID)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn(err.Error())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
segment := newSegment(collection, segmentID, partitionID, collectionID, "", segmentTypeSealed, onService)
|
2021-07-16 17:19:55 +08:00
|
|
|
err = loader.loadSegmentInternal(collectionID, segment, info)
|
2021-06-15 12:41:40 +08:00
|
|
|
if err != nil {
|
|
|
|
deleteSegment(segment)
|
|
|
|
log.Error(err.Error())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
err = loader.historicalReplica.setSegment(segment)
|
|
|
|
if err != nil {
|
|
|
|
deleteSegment(segment)
|
|
|
|
log.Error(err.Error())
|
2021-06-19 11:45:09 +08:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
if onService {
|
2021-06-22 16:44:09 +08:00
|
|
|
key := fmt.Sprintf("%s/%d", queryCoordSegmentMetaPrefix, segmentID)
|
2021-06-19 11:45:09 +08:00
|
|
|
value, err := loader.etcdKV.Load(key)
|
|
|
|
if err != nil {
|
|
|
|
deleteSegment(segment)
|
|
|
|
log.Error("error when load segment info from etcd", zap.Any("error", err.Error()))
|
|
|
|
continue
|
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
segmentInfo := &querypb.SegmentInfo{}
|
2021-06-19 11:45:09 +08:00
|
|
|
err = proto.UnmarshalText(value, segmentInfo)
|
|
|
|
if err != nil {
|
|
|
|
deleteSegment(segment)
|
|
|
|
log.Error("error when unmarshal segment info from etcd", zap.Any("error", err.Error()))
|
|
|
|
continue
|
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
segmentInfo.SegmentState = querypb.SegmentState_sealed
|
2021-06-19 11:45:09 +08:00
|
|
|
newKey := fmt.Sprintf("%s/%d", queryNodeSegmentMetaPrefix, segmentID)
|
|
|
|
err = loader.etcdKV.Save(newKey, proto.MarshalTextString(segmentInfo))
|
|
|
|
if err != nil {
|
|
|
|
deleteSegment(segment)
|
|
|
|
log.Error("error when update segment info to etcd", zap.Any("error", err.Error()))
|
|
|
|
}
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-06-15 12:41:40 +08:00
|
|
|
// sendQueryNodeStats
|
|
|
|
return loader.indexLoader.sendQueryNodeStats()
|
|
|
|
}
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentInternal(collectionID UniqueID, segment *Segment, segmentLoadInfo *querypb.SegmentLoadInfo) error {
|
2021-06-15 12:41:40 +08:00
|
|
|
vectorFieldIDs, err := loader.historicalReplica.getVecFieldIDsByCollectionID(collectionID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
if len(vectorFieldIDs) <= 0 {
|
|
|
|
return fmt.Errorf("no vector field in collection %d", collectionID)
|
|
|
|
}
|
|
|
|
|
|
|
|
// add VectorFieldInfo for vector fields
|
|
|
|
for _, fieldBinlog := range segmentLoadInfo.BinlogPaths {
|
|
|
|
if funcutil.SliceContain(vectorFieldIDs, fieldBinlog.FieldID) {
|
|
|
|
vectorFieldInfo := newVectorFieldInfo(fieldBinlog)
|
|
|
|
segment.setVectorFieldInfo(fieldBinlog.FieldID, vectorFieldInfo)
|
|
|
|
}
|
|
|
|
}
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
indexedFieldIDs := make([]int64, 0)
|
2021-06-15 12:41:40 +08:00
|
|
|
for _, vecFieldID := range vectorFieldIDs {
|
|
|
|
err = loader.indexLoader.setIndexInfo(collectionID, segment, vecFieldID)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn(err.Error())
|
|
|
|
continue
|
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
indexedFieldIDs = append(indexedFieldIDs, vecFieldID)
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
|
|
|
|
// we don't need to load raw data for indexed vector field
|
|
|
|
fieldBinlogs := loader.filterFieldBinlogs(segmentLoadInfo.BinlogPaths, indexedFieldIDs)
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-06-15 12:41:40 +08:00
|
|
|
log.Debug("loading insert...")
|
2021-07-16 17:19:55 +08:00
|
|
|
err = loader.loadSegmentFieldsData(segment, fieldBinlogs)
|
2021-06-15 12:41:40 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
for _, id := range indexedFieldIDs {
|
2021-06-15 12:41:40 +08:00
|
|
|
log.Debug("loading index...")
|
|
|
|
err = loader.indexLoader.loadIndex(segment, id)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-07-13 14:16:00 +08:00
|
|
|
//func (loader *segmentLoader) GetSegmentStates(segmentID UniqueID) (*datapb.GetSegmentStatesResponse, error) {
|
|
|
|
// ctx := context.TODO()
|
|
|
|
// if loader.dataCoord == nil {
|
|
|
|
// return nil, errors.New("null data service client")
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// segmentStatesRequest := &datapb.GetSegmentStatesRequest{
|
|
|
|
// SegmentIDs: []int64{segmentID},
|
|
|
|
// }
|
|
|
|
// statesResponse, err := loader.dataCoord.GetSegmentStates(ctx, segmentStatesRequest)
|
|
|
|
// if err != nil || statesResponse.Status.ErrorCode != commonpb.ErrorCode_Success {
|
|
|
|
// return nil, err
|
|
|
|
// }
|
|
|
|
// if len(statesResponse.States) != 1 {
|
|
|
|
// return nil, errors.New("segment states' len should be 1")
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// return statesResponse, nil
|
|
|
|
//}
|
2021-02-23 14:13:33 +08:00
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) filterFieldBinlogs(fieldBinlogs []*datapb.FieldBinlog, skipFieldIDs []int64) []*datapb.FieldBinlog {
|
|
|
|
result := make([]*datapb.FieldBinlog, 0)
|
|
|
|
for _, fieldBinlog := range fieldBinlogs {
|
|
|
|
if !funcutil.SliceContain(skipFieldIDs, fieldBinlog.FieldID) {
|
|
|
|
result = append(result, fieldBinlog)
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-02-26 11:15:44 +08:00
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
return result
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentFieldsData(segment *Segment, fieldBinlogs []*datapb.FieldBinlog) error {
|
2021-03-22 16:36:10 +08:00
|
|
|
iCodec := storage.InsertCodec{}
|
2021-06-15 12:41:40 +08:00
|
|
|
defer func() {
|
|
|
|
err := iCodec.Close()
|
|
|
|
if err != nil {
|
|
|
|
log.Error(err.Error())
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
blobs := make([]*storage.Blob, 0)
|
2021-07-16 17:19:55 +08:00
|
|
|
for _, fb := range fieldBinlogs {
|
2021-06-15 12:41:40 +08:00
|
|
|
log.Debug("load segment fields data",
|
|
|
|
zap.Int64("segmentID", segment.segmentID),
|
2021-07-16 17:19:55 +08:00
|
|
|
zap.Any("fieldID", fb.FieldID),
|
|
|
|
zap.String("paths", fmt.Sprintln(fb.Binlogs)),
|
2021-06-15 12:41:40 +08:00
|
|
|
)
|
2021-07-16 17:19:55 +08:00
|
|
|
for _, path := range fb.Binlogs {
|
2021-06-29 13:24:15 +08:00
|
|
|
p := path
|
2021-06-19 11:45:09 +08:00
|
|
|
binLog, err := loader.minioKV.Load(path)
|
2021-02-06 11:35:35 +08:00
|
|
|
if err != nil {
|
|
|
|
// TODO: return or continue?
|
|
|
|
return err
|
|
|
|
}
|
2021-06-22 14:10:09 +08:00
|
|
|
blob := &storage.Blob{
|
2021-06-29 13:24:15 +08:00
|
|
|
Key: p,
|
2021-06-22 14:10:09 +08:00
|
|
|
Value: []byte(binLog),
|
|
|
|
}
|
|
|
|
blobs = append(blobs, blob)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
// mark the flag that vector raw data will be loaded into memory
|
|
|
|
if vecFieldInfo, err := segment.getVectorFieldInfo(fb.FieldID); err == nil {
|
|
|
|
vecFieldInfo.setRawDataInMemory(true)
|
|
|
|
}
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
_, _, insertData, err := iCodec.Deserialize(blobs)
|
|
|
|
if err != nil {
|
|
|
|
log.Error(err.Error())
|
|
|
|
return err
|
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
|
2021-06-15 12:41:40 +08:00
|
|
|
for fieldID, value := range insertData.Data {
|
|
|
|
var numRows int
|
|
|
|
var data interface{}
|
|
|
|
switch fieldData := value.(type) {
|
|
|
|
case *storage.BoolFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int8FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int16FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int32FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int64FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.FloatFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.DoubleFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case storage.StringFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.FloatVectorFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.BinaryVectorFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
default:
|
|
|
|
return errors.New("unexpected field data type")
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-06-15 12:41:40 +08:00
|
|
|
err = segment.segmentLoadFieldData(fieldID, numRows, data)
|
2021-02-06 11:35:35 +08:00
|
|
|
if err != nil {
|
2021-06-15 12:41:40 +08:00
|
|
|
// TODO: return or continue?
|
2021-02-06 11:35:35 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2021-06-15 12:41:40 +08:00
|
|
|
|
2021-02-06 11:35:35 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-07-17 15:17:30 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentVectorFieldData(info *VectorFieldInfo) error {
|
2021-07-16 17:19:55 +08:00
|
|
|
iCodec := storage.InsertCodec{}
|
|
|
|
defer func() {
|
|
|
|
err := iCodec.Close()
|
|
|
|
if err != nil {
|
|
|
|
log.Error(err.Error())
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
for _, path := range info.fieldBinlog.Binlogs {
|
|
|
|
if data := info.getRawData(path); data != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2021-07-17 15:17:30 +08:00
|
|
|
log.Debug("load vector raw data", zap.String("path", path))
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
binLog, err := loader.minioKV.Load(path)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
blob := &storage.Blob{
|
|
|
|
Key: path,
|
|
|
|
Value: []byte(binLog),
|
|
|
|
}
|
|
|
|
|
|
|
|
insertFieldData, err := iCodec.DeserializeOneVectorBinlog(blob)
|
|
|
|
if err != nil {
|
|
|
|
log.Error(err.Error())
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// save raw data into segment.vectorFieldInfo
|
|
|
|
info.setRawData(path, insertFieldData.Data)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-07-13 14:16:00 +08:00
|
|
|
func newSegmentLoader(ctx context.Context, rootCoord types.RootCoord, indexCoord types.IndexCoord, replica ReplicaInterface, etcdKV *etcdkv.EtcdKV) *segmentLoader {
|
2021-02-06 11:35:35 +08:00
|
|
|
option := &minioKV.Option{
|
|
|
|
Address: Params.MinioEndPoint,
|
|
|
|
AccessKeyID: Params.MinioAccessKeyID,
|
|
|
|
SecretAccessKeyID: Params.MinioSecretAccessKey,
|
|
|
|
UseSSL: Params.MinioUseSSLStr,
|
|
|
|
CreateBucket: true,
|
|
|
|
BucketName: Params.MinioBucketName,
|
|
|
|
}
|
|
|
|
|
|
|
|
client, err := minioKV.NewMinIOKV(ctx, option)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
2021-06-21 17:28:03 +08:00
|
|
|
iLoader := newIndexLoader(ctx, rootCoord, indexCoord, replica)
|
2021-02-06 11:35:35 +08:00
|
|
|
return &segmentLoader{
|
2021-06-15 12:41:40 +08:00
|
|
|
historicalReplica: replica,
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-06-19 11:45:09 +08:00
|
|
|
minioKV: client,
|
|
|
|
etcdKV: etcdKV,
|
2021-02-06 11:35:35 +08:00
|
|
|
|
|
|
|
indexLoader: iLoader,
|
|
|
|
}
|
|
|
|
}
|