2021-04-19 13:47:10 +08:00
|
|
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
|
|
|
|
2021-02-06 11:35:35 +08:00
|
|
|
package querynode
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2021-03-05 10:15:27 +08:00
|
|
|
"errors"
|
2021-04-07 18:29:19 +08:00
|
|
|
"fmt"
|
2021-03-22 16:36:10 +08:00
|
|
|
"strconv"
|
2021-03-05 10:15:27 +08:00
|
|
|
|
2021-04-07 18:29:19 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2021-02-06 11:35:35 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/kv"
|
|
|
|
minioKV "github.com/zilliztech/milvus-distributed/internal/kv/minio"
|
2021-04-07 18:29:19 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/log"
|
2021-02-22 16:34:15 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
|
2021-02-06 11:35:35 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/proto/datapb"
|
2021-03-12 14:22:09 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/proto/internalpb"
|
2021-02-06 11:35:35 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/storage"
|
2021-03-22 16:36:10 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/types"
|
2021-02-06 11:35:35 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
// segmentLoader is only responsible for loading the field data from binlog
|
|
|
|
type segmentLoader struct {
|
2021-03-05 16:52:45 +08:00
|
|
|
replica ReplicaInterface
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-03-08 10:09:48 +08:00
|
|
|
dataService types.DataService
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-04-12 18:09:28 +08:00
|
|
|
kv kv.BaseKV // minio kv
|
2021-02-06 11:35:35 +08:00
|
|
|
|
|
|
|
indexLoader *indexLoader
|
|
|
|
}
|
|
|
|
|
2021-03-12 14:22:09 +08:00
|
|
|
func (loader *segmentLoader) getInsertBinlogPaths(segmentID UniqueID) ([]*internalpb.StringList, []int64, error) {
|
2021-02-26 17:44:24 +08:00
|
|
|
ctx := context.TODO()
|
2021-03-08 10:09:48 +08:00
|
|
|
if loader.dataService == nil {
|
2021-02-06 11:35:35 +08:00
|
|
|
return nil, nil, errors.New("null data service client")
|
|
|
|
}
|
|
|
|
|
2021-03-12 14:22:09 +08:00
|
|
|
insertBinlogPathRequest := &datapb.GetInsertBinlogPathsRequest{
|
2021-02-06 11:35:35 +08:00
|
|
|
SegmentID: segmentID,
|
|
|
|
}
|
|
|
|
|
2021-03-08 10:09:48 +08:00
|
|
|
pathResponse, err := loader.dataService.GetInsertBinlogPaths(ctx, insertBinlogPathRequest)
|
2021-03-10 22:06:22 +08:00
|
|
|
if err != nil || pathResponse.Status.ErrorCode != commonpb.ErrorCode_Success {
|
2021-02-06 11:35:35 +08:00
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
2021-02-22 16:34:15 +08:00
|
|
|
if len(pathResponse.FieldIDs) != len(pathResponse.Paths) || len(pathResponse.FieldIDs) <= 0 {
|
2021-03-12 14:22:09 +08:00
|
|
|
return nil, nil, errors.New("illegal GetInsertBinlogPathsResponse")
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return pathResponse.Paths, pathResponse.FieldIDs, nil
|
|
|
|
}
|
|
|
|
|
2021-03-12 14:22:09 +08:00
|
|
|
func (loader *segmentLoader) GetSegmentStates(segmentID UniqueID) (*datapb.GetSegmentStatesResponse, error) {
|
2021-02-26 17:44:24 +08:00
|
|
|
ctx := context.TODO()
|
2021-03-08 10:09:48 +08:00
|
|
|
if loader.dataService == nil {
|
2021-02-23 14:13:33 +08:00
|
|
|
return nil, errors.New("null data service client")
|
|
|
|
}
|
|
|
|
|
2021-03-12 14:22:09 +08:00
|
|
|
segmentStatesRequest := &datapb.GetSegmentStatesRequest{
|
2021-02-23 14:13:33 +08:00
|
|
|
SegmentIDs: []int64{segmentID},
|
|
|
|
}
|
2021-03-08 10:09:48 +08:00
|
|
|
statesResponse, err := loader.dataService.GetSegmentStates(ctx, segmentStatesRequest)
|
2021-03-10 22:06:22 +08:00
|
|
|
if err != nil || statesResponse.Status.ErrorCode != commonpb.ErrorCode_Success {
|
2021-02-23 14:13:33 +08:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if len(statesResponse.States) != 1 {
|
|
|
|
return nil, errors.New("segment states' len should be 1")
|
|
|
|
}
|
|
|
|
|
|
|
|
return statesResponse, nil
|
|
|
|
}
|
|
|
|
|
2021-02-06 11:35:35 +08:00
|
|
|
func (loader *segmentLoader) filterOutVectorFields(fieldIDs []int64, vectorFields []int64) []int64 {
|
|
|
|
containsFunc := func(s []int64, e int64) bool {
|
|
|
|
for _, a := range s {
|
|
|
|
if a == e {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
targetFields := make([]int64, 0)
|
|
|
|
for _, id := range fieldIDs {
|
|
|
|
if !containsFunc(vectorFields, id) {
|
|
|
|
targetFields = append(targetFields, id)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return targetFields
|
|
|
|
}
|
|
|
|
|
2021-03-12 14:22:09 +08:00
|
|
|
func (loader *segmentLoader) checkTargetFields(paths []*internalpb.StringList, srcFieldIDs []int64, dstFieldIDs []int64) (map[int64]*internalpb.StringList, error) {
|
|
|
|
targetFields := make(map[int64]*internalpb.StringList)
|
2021-02-06 11:35:35 +08:00
|
|
|
|
|
|
|
containsFunc := func(s []int64, e int64) bool {
|
|
|
|
for _, a := range s {
|
|
|
|
if a == e {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-02-26 11:15:44 +08:00
|
|
|
for _, fieldID := range dstFieldIDs {
|
2021-02-22 16:34:15 +08:00
|
|
|
if !containsFunc(srcFieldIDs, fieldID) {
|
|
|
|
return nil, errors.New("uncompleted fields")
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-02-26 11:15:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for i := range srcFieldIDs {
|
|
|
|
targetFields[srcFieldIDs[i]] = paths[i]
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-02-22 16:34:15 +08:00
|
|
|
return targetFields, nil
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-03-12 14:22:09 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentFieldsData(segment *Segment, targetFields map[int64]*internalpb.StringList) error {
|
2021-03-22 16:36:10 +08:00
|
|
|
iCodec := storage.InsertCodec{}
|
|
|
|
defer iCodec.Close()
|
2021-02-06 11:35:35 +08:00
|
|
|
for id, p := range targetFields {
|
|
|
|
if id == timestampFieldID {
|
|
|
|
// seg core doesn't need timestamp field
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
paths := p.Values
|
|
|
|
blobs := make([]*storage.Blob, 0)
|
2021-04-07 18:29:19 +08:00
|
|
|
log.Debug("loadSegmentFieldsData", zap.Int64("segmentID", segment.segmentID), zap.String("path", fmt.Sprintln(paths)))
|
2021-02-06 11:35:35 +08:00
|
|
|
for _, path := range paths {
|
|
|
|
binLog, err := loader.kv.Load(path)
|
|
|
|
if err != nil {
|
|
|
|
// TODO: return or continue?
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
blobs = append(blobs, &storage.Blob{
|
|
|
|
Key: strconv.FormatInt(id, 10), // TODO: key???
|
|
|
|
Value: []byte(binLog),
|
|
|
|
})
|
|
|
|
}
|
2021-03-22 16:36:10 +08:00
|
|
|
_, _, insertData, err := iCodec.Deserialize(blobs)
|
2021-02-06 11:35:35 +08:00
|
|
|
if err != nil {
|
|
|
|
// TODO: return or continue
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if len(insertData.Data) != 1 {
|
|
|
|
return errors.New("we expect only one field in deserialized insert data")
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, value := range insertData.Data {
|
|
|
|
var numRows int
|
|
|
|
var data interface{}
|
|
|
|
|
|
|
|
switch fieldData := value.(type) {
|
|
|
|
case *storage.BoolFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int8FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int16FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int32FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int64FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.FloatFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.DoubleFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case storage.StringFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.FloatVectorFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.BinaryVectorFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
default:
|
|
|
|
return errors.New("unexpected field data type")
|
|
|
|
}
|
|
|
|
err = segment.segmentLoadFieldData(id, numRows, data)
|
|
|
|
if err != nil {
|
|
|
|
// TODO: return or continue?
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-03-22 16:36:10 +08:00
|
|
|
func newSegmentLoader(ctx context.Context, masterService types.MasterService, indexService types.IndexService, dataService types.DataService, replica ReplicaInterface) *segmentLoader {
|
2021-02-06 11:35:35 +08:00
|
|
|
option := &minioKV.Option{
|
|
|
|
Address: Params.MinioEndPoint,
|
|
|
|
AccessKeyID: Params.MinioAccessKeyID,
|
|
|
|
SecretAccessKeyID: Params.MinioSecretAccessKey,
|
|
|
|
UseSSL: Params.MinioUseSSLStr,
|
|
|
|
CreateBucket: true,
|
|
|
|
BucketName: Params.MinioBucketName,
|
|
|
|
}
|
|
|
|
|
|
|
|
client, err := minioKV.NewMinIOKV(ctx, option)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
2021-03-08 10:09:48 +08:00
|
|
|
iLoader := newIndexLoader(ctx, masterService, indexService, replica)
|
2021-02-06 11:35:35 +08:00
|
|
|
return &segmentLoader{
|
|
|
|
replica: replica,
|
|
|
|
|
2021-03-08 10:09:48 +08:00
|
|
|
dataService: dataService,
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-03-22 16:36:10 +08:00
|
|
|
kv: client,
|
2021-02-06 11:35:35 +08:00
|
|
|
|
|
|
|
indexLoader: iLoader,
|
|
|
|
}
|
|
|
|
}
|