2021-12-10 20:59:42 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
2021-04-19 13:47:10 +08:00
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
2021-12-10 20:59:42 +08:00
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
2021-04-19 13:47:10 +08:00
|
|
|
//
|
2021-12-10 20:59:42 +08:00
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2021-04-19 13:47:10 +08:00
|
|
|
|
2021-02-06 11:35:35 +08:00
|
|
|
package querynode
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2021-03-05 10:15:27 +08:00
|
|
|
"errors"
|
2021-04-07 18:29:19 +08:00
|
|
|
"fmt"
|
2021-10-18 20:08:42 +08:00
|
|
|
"path"
|
|
|
|
"strconv"
|
2021-11-21 07:33:14 +08:00
|
|
|
"sync"
|
2021-11-26 22:45:24 +08:00
|
|
|
"time"
|
2021-03-05 10:15:27 +08:00
|
|
|
|
2021-04-07 18:29:19 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2021-10-18 20:08:42 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/common"
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/kv"
|
2021-06-19 11:45:09 +08:00
|
|
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
2021-04-22 14:45:57 +08:00
|
|
|
minioKV "github.com/milvus-io/milvus/internal/kv/minio"
|
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
2021-11-12 18:27:10 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/msgstream"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
2021-11-12 18:27:10 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
2021-07-16 17:19:55 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
2021-11-21 07:33:14 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/rootcoord"
|
2021-04-22 14:45:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
|
|
"github.com/milvus-io/milvus/internal/types"
|
2021-07-16 17:19:55 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/funcutil"
|
2021-02-06 11:35:35 +08:00
|
|
|
)
|
|
|
|
|
2021-11-26 22:45:24 +08:00
|
|
|
const timeoutForEachRead = 10 * time.Second
|
|
|
|
|
2021-02-06 11:35:35 +08:00
|
|
|
// segmentLoader is only responsible for loading the field data from binlog
|
|
|
|
type segmentLoader struct {
|
2021-06-15 12:41:40 +08:00
|
|
|
historicalReplica ReplicaInterface
|
2021-11-12 18:27:10 +08:00
|
|
|
streamingReplica ReplicaInterface
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-06-21 18:22:13 +08:00
|
|
|
dataCoord types.DataCoord
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
minioKV kv.DataKV // minio minioKV
|
2021-06-19 11:45:09 +08:00
|
|
|
etcdKV *etcdkv.EtcdKV
|
2021-02-06 11:35:35 +08:00
|
|
|
|
|
|
|
indexLoader *indexLoader
|
2021-11-21 07:33:14 +08:00
|
|
|
|
|
|
|
factory msgstream.Factory
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-11-12 18:27:10 +08:00
|
|
|
func (loader *segmentLoader) loadSegment(req *querypb.LoadSegmentsRequest, segmentType segmentType) error {
|
2021-06-15 12:41:40 +08:00
|
|
|
// no segment needs to load, return
|
|
|
|
if len(req.Infos) == 0 {
|
|
|
|
return nil
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-11-12 18:27:10 +08:00
|
|
|
log.Debug("segmentLoader start loading...",
|
|
|
|
zap.Any("collectionID", req.CollectionID),
|
|
|
|
zap.Any("numOfSegments", len(req.Infos)),
|
|
|
|
zap.Any("loadType", segmentType),
|
|
|
|
)
|
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
newSegments := make(map[UniqueID]*Segment)
|
2021-08-12 14:14:08 +08:00
|
|
|
segmentGC := func() {
|
|
|
|
for _, s := range newSegments {
|
|
|
|
deleteSegment(s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
segmentFieldBinLogs := make(map[UniqueID][]*datapb.FieldBinlog)
|
|
|
|
segmentIndexedFieldIDs := make(map[UniqueID][]FieldID)
|
|
|
|
segmentSizes := make(map[UniqueID]int64)
|
|
|
|
|
|
|
|
// prepare and estimate segments size
|
2021-06-15 12:41:40 +08:00
|
|
|
for _, info := range req.Infos {
|
|
|
|
segmentID := info.SegmentID
|
|
|
|
partitionID := info.PartitionID
|
|
|
|
collectionID := info.CollectionID
|
|
|
|
|
|
|
|
collection, err := loader.historicalReplica.getCollectionByID(collectionID)
|
|
|
|
if err != nil {
|
2021-08-12 14:14:08 +08:00
|
|
|
segmentGC()
|
|
|
|
return err
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-11-12 18:27:10 +08:00
|
|
|
segment := newSegment(collection, segmentID, partitionID, collectionID, "", segmentType, true)
|
2021-10-22 20:49:10 +08:00
|
|
|
newSegments[segmentID] = segment
|
|
|
|
fieldBinlog, indexedFieldID, err := loader.getFieldAndIndexInfo(segment, info)
|
2021-06-15 12:41:40 +08:00
|
|
|
if err != nil {
|
2021-08-12 14:14:08 +08:00
|
|
|
segmentGC()
|
|
|
|
return err
|
2021-06-19 11:45:09 +08:00
|
|
|
}
|
2021-10-22 20:49:10 +08:00
|
|
|
segmentSize, err := loader.estimateSegmentSize(segment, fieldBinlog, indexedFieldID)
|
|
|
|
if err != nil {
|
|
|
|
segmentGC()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
segmentFieldBinLogs[segmentID] = fieldBinlog
|
|
|
|
segmentIndexedFieldIDs[segmentID] = indexedFieldID
|
|
|
|
segmentSizes[segmentID] = segmentSize
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
// check memory limit
|
|
|
|
err := loader.checkSegmentSize(req.Infos[0].CollectionID, segmentSizes)
|
2021-06-15 12:41:40 +08:00
|
|
|
if err != nil {
|
2021-10-22 20:49:10 +08:00
|
|
|
segmentGC()
|
2021-06-15 12:41:40 +08:00
|
|
|
return err
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
// start to load
|
|
|
|
for _, info := range req.Infos {
|
|
|
|
segmentID := info.SegmentID
|
|
|
|
if newSegments[segmentID] == nil || segmentFieldBinLogs[segmentID] == nil || segmentIndexedFieldIDs[segmentID] == nil {
|
|
|
|
segmentGC()
|
|
|
|
return errors.New(fmt.Sprintln("unexpected error, cannot find load infos, this error should not happen, collectionID = ", req.Infos[0].CollectionID))
|
|
|
|
}
|
|
|
|
err = loader.loadSegmentInternal(newSegments[segmentID],
|
|
|
|
segmentFieldBinLogs[segmentID],
|
|
|
|
segmentIndexedFieldIDs[segmentID],
|
2021-11-12 18:27:10 +08:00
|
|
|
info,
|
|
|
|
segmentType)
|
2021-10-22 20:49:10 +08:00
|
|
|
if err != nil {
|
|
|
|
segmentGC()
|
|
|
|
return err
|
2021-07-16 17:19:55 +08:00
|
|
|
}
|
|
|
|
}
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
// set segments
|
2021-11-12 18:27:10 +08:00
|
|
|
switch segmentType {
|
|
|
|
case segmentTypeGrowing:
|
|
|
|
for _, s := range newSegments {
|
|
|
|
err := loader.streamingReplica.setSegment(s)
|
|
|
|
if err != nil {
|
|
|
|
segmentGC()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
case segmentTypeSealed:
|
|
|
|
for _, s := range newSegments {
|
|
|
|
err := loader.historicalReplica.setSegment(s)
|
|
|
|
if err != nil {
|
|
|
|
segmentGC()
|
|
|
|
return err
|
|
|
|
}
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
2021-11-12 18:27:10 +08:00
|
|
|
default:
|
|
|
|
err := errors.New(fmt.Sprintln("illegal segment type when load segment, collectionID = ", req.CollectionID))
|
|
|
|
segmentGC()
|
|
|
|
return err
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-10-22 20:49:10 +08:00
|
|
|
return nil
|
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentInternal(segment *Segment,
|
|
|
|
fieldBinLogs []*datapb.FieldBinlog,
|
|
|
|
indexFieldIDs []FieldID,
|
2021-11-12 18:27:10 +08:00
|
|
|
segmentLoadInfo *querypb.SegmentLoadInfo,
|
|
|
|
segmentType segmentType) error {
|
|
|
|
log.Debug("loading insert...",
|
|
|
|
zap.Any("collectionID", segment.collectionID),
|
|
|
|
zap.Any("segmentID", segment.ID()),
|
|
|
|
zap.Any("segmentType", segmentType),
|
|
|
|
zap.Any("fieldBinLogs", fieldBinLogs),
|
|
|
|
zap.Any("indexFieldIDs", indexFieldIDs),
|
|
|
|
)
|
|
|
|
err := loader.loadSegmentFieldsData(segment, fieldBinLogs, segmentType)
|
2021-06-15 12:41:40 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-10-18 20:08:42 +08:00
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
pkIDField, err := loader.historicalReplica.getPKFieldIDByCollectionID(segment.collectionID)
|
2021-10-18 20:08:42 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-10-22 14:31:13 +08:00
|
|
|
if pkIDField == common.InvalidFieldID {
|
|
|
|
log.Warn("segment primary key field doesn't exist when load segment")
|
|
|
|
} else {
|
|
|
|
log.Debug("loading bloom filter...")
|
|
|
|
pkStatsBinlogs := loader.filterPKStatsBinlogs(segmentLoadInfo.Statslogs, pkIDField)
|
|
|
|
err = loader.loadSegmentBloomFilter(segment, pkStatsBinlogs)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debug("loading delta...")
|
|
|
|
err = loader.loadDeltaLogs(segment, segmentLoadInfo.Deltalogs)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
for _, id := range indexFieldIDs {
|
2021-06-15 12:41:40 +08:00
|
|
|
log.Debug("loading index...")
|
|
|
|
err = loader.indexLoader.loadIndex(segment, id)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 14:31:13 +08:00
|
|
|
func (loader *segmentLoader) filterPKStatsBinlogs(fieldBinlogs []*datapb.FieldBinlog, pkFieldID int64) []string {
|
|
|
|
result := make([]string, 0)
|
|
|
|
for _, fieldBinlog := range fieldBinlogs {
|
|
|
|
if fieldBinlog.FieldID == pkFieldID {
|
|
|
|
result = append(result, fieldBinlog.Binlogs...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
2021-07-16 17:19:55 +08:00
|
|
|
func (loader *segmentLoader) filterFieldBinlogs(fieldBinlogs []*datapb.FieldBinlog, skipFieldIDs []int64) []*datapb.FieldBinlog {
|
|
|
|
result := make([]*datapb.FieldBinlog, 0)
|
|
|
|
for _, fieldBinlog := range fieldBinlogs {
|
|
|
|
if !funcutil.SliceContain(skipFieldIDs, fieldBinlog.FieldID) {
|
|
|
|
result = append(result, fieldBinlog)
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-02-26 11:15:44 +08:00
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
return result
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
|
2021-11-12 18:27:10 +08:00
|
|
|
func (loader *segmentLoader) loadSegmentFieldsData(segment *Segment, fieldBinlogs []*datapb.FieldBinlog, segmentType segmentType) error {
|
2021-03-22 16:36:10 +08:00
|
|
|
iCodec := storage.InsertCodec{}
|
2021-06-15 12:41:40 +08:00
|
|
|
blobs := make([]*storage.Blob, 0)
|
2021-07-16 17:19:55 +08:00
|
|
|
for _, fb := range fieldBinlogs {
|
2021-06-15 12:41:40 +08:00
|
|
|
log.Debug("load segment fields data",
|
|
|
|
zap.Int64("segmentID", segment.segmentID),
|
2021-07-16 17:19:55 +08:00
|
|
|
zap.Any("fieldID", fb.FieldID),
|
|
|
|
zap.String("paths", fmt.Sprintln(fb.Binlogs)),
|
2021-06-15 12:41:40 +08:00
|
|
|
)
|
2021-07-16 17:19:55 +08:00
|
|
|
for _, path := range fb.Binlogs {
|
2021-06-29 13:24:15 +08:00
|
|
|
p := path
|
2021-06-19 11:45:09 +08:00
|
|
|
binLog, err := loader.minioKV.Load(path)
|
2021-02-06 11:35:35 +08:00
|
|
|
if err != nil {
|
|
|
|
// TODO: return or continue?
|
|
|
|
return err
|
|
|
|
}
|
2021-06-22 14:10:09 +08:00
|
|
|
blob := &storage.Blob{
|
2021-06-29 13:24:15 +08:00
|
|
|
Key: p,
|
2021-06-22 14:10:09 +08:00
|
|
|
Value: []byte(binLog),
|
|
|
|
}
|
|
|
|
blobs = append(blobs, blob)
|
2021-06-15 12:41:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_, _, insertData, err := iCodec.Deserialize(blobs)
|
|
|
|
if err != nil {
|
2021-07-31 10:47:22 +08:00
|
|
|
log.Warn(err.Error())
|
2021-06-15 12:41:40 +08:00
|
|
|
return err
|
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
|
2021-11-12 18:27:10 +08:00
|
|
|
for i := range insertData.Infos {
|
|
|
|
log.Debug("segmentLoader deserialize fields",
|
|
|
|
zap.Any("collectionID", segment.collectionID),
|
|
|
|
zap.Any("segmentID", segment.ID()),
|
|
|
|
zap.Any("numRows", insertData.Infos[i].Length),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
switch segmentType {
|
|
|
|
case segmentTypeGrowing:
|
|
|
|
timestamps, ids, rowData, err := storage.TransferColumnBasedInsertDataToRowBased(insertData)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return loader.loadGrowingSegments(segment, ids, timestamps, rowData)
|
|
|
|
case segmentTypeSealed:
|
|
|
|
return loader.loadSealedSegments(segment, insertData)
|
|
|
|
default:
|
|
|
|
err := errors.New(fmt.Sprintln("illegal segment type when load segment, collectionID = ", segment.collectionID))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (loader *segmentLoader) loadGrowingSegments(segment *Segment,
|
|
|
|
ids []UniqueID,
|
|
|
|
timestamps []Timestamp,
|
|
|
|
records []*commonpb.Blob) error {
|
|
|
|
if len(ids) != len(timestamps) || len(timestamps) != len(records) {
|
|
|
|
return errors.New(fmt.Sprintln("illegal insert data when load segment, collectionID = ", segment.collectionID))
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debug("start load growing segments...",
|
|
|
|
zap.Any("collectionID", segment.collectionID),
|
|
|
|
zap.Any("segmentID", segment.ID()),
|
|
|
|
zap.Any("numRows", len(ids)),
|
|
|
|
)
|
|
|
|
|
|
|
|
// 1. do preInsert
|
|
|
|
var numOfRecords = len(ids)
|
|
|
|
offset, err := segment.segmentPreInsert(numOfRecords)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
log.Debug("insertNode operator", zap.Int("insert size", numOfRecords), zap.Int64("insert offset", offset), zap.Int64("segment id", segment.ID()))
|
|
|
|
|
|
|
|
// 2. update bloom filter
|
|
|
|
tmpInsertMsg := &msgstream.InsertMsg{
|
|
|
|
InsertRequest: internalpb.InsertRequest{
|
|
|
|
CollectionID: segment.collectionID,
|
|
|
|
Timestamps: timestamps,
|
|
|
|
RowIDs: ids,
|
|
|
|
RowData: records,
|
|
|
|
},
|
|
|
|
}
|
2021-11-29 22:01:41 +08:00
|
|
|
pks, err := getPrimaryKeys(tmpInsertMsg, loader.streamingReplica)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-11-12 18:27:10 +08:00
|
|
|
segment.updateBloomFilter(pks)
|
|
|
|
|
|
|
|
// 3. do insert
|
|
|
|
err = segment.segmentInsert(offset, &ids, ×tamps, &records)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-12-10 18:51:08 +08:00
|
|
|
log.Debug("Do insert done in segment loader", zap.Int("len", numOfRecords), zap.Int64("segmentID", segment.ID()), zap.Int64("collectionID", segment.collectionID))
|
2021-11-12 18:27:10 +08:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (loader *segmentLoader) loadSealedSegments(segment *Segment, insertData *storage.InsertData) error {
|
|
|
|
log.Debug("start load sealed segments...",
|
|
|
|
zap.Any("collectionID", segment.collectionID),
|
|
|
|
zap.Any("segmentID", segment.ID()),
|
|
|
|
zap.Any("numFields", len(insertData.Data)),
|
|
|
|
)
|
2021-06-15 12:41:40 +08:00
|
|
|
for fieldID, value := range insertData.Data {
|
2021-07-24 09:25:22 +08:00
|
|
|
var numRows []int64
|
2021-06-15 12:41:40 +08:00
|
|
|
var data interface{}
|
|
|
|
switch fieldData := value.(type) {
|
|
|
|
case *storage.BoolFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int8FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int16FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int32FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.Int64FieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.FloatFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.DoubleFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
2021-10-20 11:52:35 +08:00
|
|
|
case *storage.StringFieldData:
|
2021-06-15 12:41:40 +08:00
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.FloatVectorFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
case *storage.BinaryVectorFieldData:
|
|
|
|
numRows = fieldData.NumRows
|
|
|
|
data = fieldData.Data
|
|
|
|
default:
|
|
|
|
return errors.New("unexpected field data type")
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
2021-10-18 20:08:42 +08:00
|
|
|
if fieldID == common.TimeStampField {
|
2021-07-24 09:25:22 +08:00
|
|
|
segment.setIDBinlogRowSizes(numRows)
|
2021-07-16 17:19:55 +08:00
|
|
|
}
|
2021-07-24 09:25:22 +08:00
|
|
|
totalNumRows := int64(0)
|
|
|
|
for _, numRow := range numRows {
|
|
|
|
totalNumRows += numRow
|
2021-07-16 17:19:55 +08:00
|
|
|
}
|
2021-11-12 18:27:10 +08:00
|
|
|
err := segment.segmentLoadFieldData(fieldID, int(totalNumRows), data)
|
2021-07-16 17:19:55 +08:00
|
|
|
if err != nil {
|
2021-07-24 09:25:22 +08:00
|
|
|
// TODO: return or continue?
|
2021-07-16 17:19:55 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2021-10-22 14:31:13 +08:00
|
|
|
|
|
|
|
func (loader *segmentLoader) loadSegmentBloomFilter(segment *Segment, binlogPaths []string) error {
|
|
|
|
if len(binlogPaths) == 0 {
|
|
|
|
log.Info("there are no stats logs saved with segment", zap.Any("segmentID", segment.segmentID))
|
|
|
|
return nil
|
2021-10-19 20:18:47 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 14:31:13 +08:00
|
|
|
values, err := loader.minioKV.MultiLoad(binlogPaths)
|
2021-10-18 20:08:42 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
blobs := make([]*storage.Blob, 0)
|
2021-10-22 14:31:13 +08:00
|
|
|
for i := 0; i < len(values); i++ {
|
|
|
|
blobs = append(blobs, &storage.Blob{Value: []byte(values[i])})
|
2021-10-18 20:08:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
stats, err := storage.DeserializeStats(blobs)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for _, stat := range stats {
|
2021-10-20 16:14:36 +08:00
|
|
|
if stat.BF == nil {
|
|
|
|
log.Warn("stat log with nil bloom filter", zap.Int64("segmentID", segment.segmentID), zap.Any("stat", stat))
|
|
|
|
continue
|
|
|
|
}
|
2021-10-18 20:08:42 +08:00
|
|
|
err = segment.pkFilter.Merge(stat.BF)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-10-22 14:31:13 +08:00
|
|
|
func (loader *segmentLoader) loadDeltaLogs(segment *Segment, deltaLogs []*datapb.DeltaLogInfo) error {
|
|
|
|
if len(deltaLogs) == 0 {
|
|
|
|
log.Info("there are no delta logs saved with segment", zap.Any("segmentID", segment.segmentID))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
dCodec := storage.DeleteCodec{}
|
|
|
|
blobs := make([]*storage.Blob, 0)
|
|
|
|
for _, deltaLog := range deltaLogs {
|
|
|
|
value, err := loader.minioKV.Load(deltaLog.DeltaLogPath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
blob := &storage.Blob{
|
|
|
|
Key: deltaLog.DeltaLogPath,
|
|
|
|
Value: []byte(value),
|
|
|
|
}
|
|
|
|
blobs = append(blobs, blob)
|
|
|
|
}
|
|
|
|
_, _, deltaData, err := dCodec.Deserialize(blobs)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-11-09 15:01:17 +08:00
|
|
|
err = segment.segmentLoadDeletedRecord(deltaData.Pks, deltaData.Tss, deltaData.RowCount)
|
2021-10-23 09:31:10 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2021-10-22 14:31:13 +08:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-11-21 07:33:14 +08:00
|
|
|
func (loader *segmentLoader) FromDmlCPLoadDelete(ctx context.Context, collectionID int64, position *internalpb.MsgPosition) error {
|
2021-11-22 16:17:14 +08:00
|
|
|
log.Debug("from dml check point load delete", zap.Any("position", position), zap.Any("msg id", position.MsgID))
|
2021-11-21 07:33:14 +08:00
|
|
|
stream, err := loader.factory.NewMsgStream(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
pChannelName := rootcoord.ToPhysicalChannel(position.ChannelName)
|
|
|
|
position.ChannelName = pChannelName
|
2021-11-24 17:47:15 +08:00
|
|
|
stream.AsReader([]string{pChannelName}, fmt.Sprintf("querynode-%d-%d", Params.QueryNodeID, collectionID))
|
2021-11-21 07:33:14 +08:00
|
|
|
stream.SeekReaders([]*internalpb.MsgPosition{position})
|
|
|
|
|
|
|
|
delData := &deleteData{
|
|
|
|
deleteIDs: make(map[UniqueID][]int64),
|
|
|
|
deleteTimestamps: make(map[UniqueID][]Timestamp),
|
|
|
|
deleteOffset: make(map[UniqueID]int64),
|
|
|
|
}
|
|
|
|
log.Debug("start read msg from stream reader")
|
2021-11-26 22:45:24 +08:00
|
|
|
for stream.HasNext(pChannelName) {
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, timeoutForEachRead)
|
2021-11-21 07:33:14 +08:00
|
|
|
tsMsg, err := stream.Next(ctx, pChannelName)
|
|
|
|
if err != nil {
|
2021-11-26 22:45:24 +08:00
|
|
|
cancel()
|
2021-11-21 07:33:14 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if tsMsg == nil {
|
2021-11-26 22:45:24 +08:00
|
|
|
cancel()
|
|
|
|
continue
|
2021-11-21 07:33:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if tsMsg.Type() == commonpb.MsgType_Delete {
|
|
|
|
dmsg := tsMsg.(*msgstream.DeleteMsg)
|
|
|
|
if dmsg.CollectionID != collectionID {
|
2021-11-26 22:45:24 +08:00
|
|
|
cancel()
|
2021-11-21 07:33:14 +08:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
log.Debug("delete pk", zap.Any("pk", dmsg.PrimaryKeys))
|
|
|
|
processDeleteMessages(loader.historicalReplica, dmsg, delData)
|
|
|
|
}
|
2021-11-26 22:45:24 +08:00
|
|
|
cancel()
|
2021-11-21 07:33:14 +08:00
|
|
|
}
|
2021-11-26 22:45:24 +08:00
|
|
|
log.Debug("All data has been read, there is no more data", zap.String("channel", pChannelName))
|
2021-11-21 07:33:14 +08:00
|
|
|
for segmentID, pks := range delData.deleteIDs {
|
|
|
|
segment, err := loader.historicalReplica.getSegmentByID(segmentID)
|
|
|
|
if err != nil {
|
|
|
|
log.Debug(err.Error())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
offset := segment.segmentPreDelete(len(pks))
|
|
|
|
delData.deleteOffset[segmentID] = offset
|
|
|
|
}
|
|
|
|
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
for segmentID := range delData.deleteOffset {
|
|
|
|
wg.Add(1)
|
|
|
|
go deletePk(loader.historicalReplica, delData, segmentID, &wg)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
2021-11-22 16:17:14 +08:00
|
|
|
stream.Close()
|
|
|
|
log.Debug("from dml check point load done")
|
2021-11-21 07:33:14 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func deletePk(replica ReplicaInterface, deleteData *deleteData, segmentID UniqueID, wg *sync.WaitGroup) {
|
|
|
|
defer wg.Done()
|
|
|
|
log.Debug("QueryNode::iNode::delete", zap.Any("SegmentID", segmentID))
|
|
|
|
targetSegment, err := replica.getSegmentByID(segmentID)
|
|
|
|
if err != nil {
|
|
|
|
log.Error(err.Error())
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-11-23 17:17:16 +08:00
|
|
|
if targetSegment.segmentType != segmentTypeSealed && targetSegment.segmentType != segmentTypeIndexing {
|
2021-11-21 07:33:14 +08:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
ids := deleteData.deleteIDs[segmentID]
|
|
|
|
timestamps := deleteData.deleteTimestamps[segmentID]
|
|
|
|
offset := deleteData.deleteOffset[segmentID]
|
|
|
|
|
|
|
|
err = targetSegment.segmentDelete(offset, &ids, ×tamps)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("QueryNode: targetSegmentDelete failed", zap.Error(err))
|
|
|
|
return
|
|
|
|
}
|
2021-11-23 17:17:16 +08:00
|
|
|
log.Debug("Do delete done", zap.Int("len", len(deleteData.deleteIDs[segmentID])), zap.Int64("segmentID", segmentID), zap.Any("segmentType", targetSegment.segmentType))
|
2021-11-21 07:33:14 +08:00
|
|
|
}
|
|
|
|
|
2021-10-18 20:08:42 +08:00
|
|
|
// JoinIDPath joins ids to path format.
|
|
|
|
func JoinIDPath(ids ...UniqueID) string {
|
2021-12-09 12:05:04 +08:00
|
|
|
idStr := make([]string, 0, len(ids))
|
2021-10-18 20:08:42 +08:00
|
|
|
for _, id := range ids {
|
|
|
|
idStr = append(idStr, strconv.FormatInt(id, 10))
|
|
|
|
}
|
|
|
|
return path.Join(idStr...)
|
|
|
|
}
|
2021-07-16 17:19:55 +08:00
|
|
|
|
2021-10-22 20:49:10 +08:00
|
|
|
func (loader *segmentLoader) getFieldAndIndexInfo(segment *Segment,
|
|
|
|
segmentLoadInfo *querypb.SegmentLoadInfo) ([]*datapb.FieldBinlog, []FieldID, error) {
|
|
|
|
collectionID := segment.collectionID
|
|
|
|
vectorFieldIDs, err := loader.historicalReplica.getVecFieldIDsByCollectionID(collectionID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
if len(vectorFieldIDs) <= 0 {
|
|
|
|
return nil, nil, fmt.Errorf("no vector field in collection %d", collectionID)
|
|
|
|
}
|
|
|
|
|
|
|
|
// add VectorFieldInfo for vector fields
|
|
|
|
for _, fieldBinlog := range segmentLoadInfo.BinlogPaths {
|
|
|
|
if funcutil.SliceContain(vectorFieldIDs, fieldBinlog.FieldID) {
|
|
|
|
vectorFieldInfo := newVectorFieldInfo(fieldBinlog)
|
|
|
|
segment.setVectorFieldInfo(fieldBinlog.FieldID, vectorFieldInfo)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
indexedFieldIDs := make([]FieldID, 0)
|
2021-12-03 11:29:33 +08:00
|
|
|
if idxInfo, err := loader.indexLoader.getIndexInfo(collectionID, segment); err != nil {
|
|
|
|
log.Warn(err.Error())
|
|
|
|
} else {
|
|
|
|
loader.indexLoader.setIndexInfo(segment, idxInfo)
|
|
|
|
indexedFieldIDs = append(indexedFieldIDs, idxInfo.fieldID)
|
2021-10-22 20:49:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// we don't need to load raw data for indexed vector field
|
|
|
|
fieldBinlogs := loader.filterFieldBinlogs(segmentLoadInfo.BinlogPaths, indexedFieldIDs)
|
|
|
|
return fieldBinlogs, indexedFieldIDs, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (loader *segmentLoader) estimateSegmentSize(segment *Segment,
|
|
|
|
fieldBinLogs []*datapb.FieldBinlog,
|
|
|
|
indexFieldIDs []FieldID) (int64, error) {
|
|
|
|
segmentSize := int64(0)
|
|
|
|
// get fields data size, if len(indexFieldIDs) == 0, vector field would be involved in fieldBinLogs
|
|
|
|
for _, fb := range fieldBinLogs {
|
|
|
|
log.Debug("estimate segment fields size",
|
|
|
|
zap.Any("collectionID", segment.collectionID),
|
|
|
|
zap.Any("segmentID", segment.ID()),
|
|
|
|
zap.Any("fieldID", fb.FieldID),
|
|
|
|
zap.Any("paths", fb.Binlogs),
|
|
|
|
)
|
|
|
|
for _, binlogPath := range fb.Binlogs {
|
|
|
|
logSize, err := storage.EstimateMemorySize(loader.minioKV, binlogPath)
|
|
|
|
if err != nil {
|
2021-10-26 15:34:21 +08:00
|
|
|
logSize, err = storage.GetBinlogSize(loader.minioKV, binlogPath)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
2021-10-22 20:49:10 +08:00
|
|
|
}
|
|
|
|
segmentSize += logSize
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// get index size
|
|
|
|
for _, fieldID := range indexFieldIDs {
|
|
|
|
indexSize, err := loader.indexLoader.estimateIndexBinlogSize(segment, fieldID)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
segmentSize += indexSize
|
|
|
|
}
|
|
|
|
return segmentSize, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (loader *segmentLoader) checkSegmentSize(collectionID UniqueID, segmentSizes map[UniqueID]int64) error {
|
|
|
|
usedMem, err := getUsedMemory()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
totalMem, err := getTotalMemory()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
segmentTotalSize := int64(0)
|
|
|
|
for _, size := range segmentSizes {
|
|
|
|
segmentTotalSize += size
|
|
|
|
}
|
|
|
|
|
|
|
|
for segmentID, size := range segmentSizes {
|
|
|
|
log.Debug("memory stats when load segment",
|
|
|
|
zap.Any("collectionIDs", collectionID),
|
|
|
|
zap.Any("segmentID", segmentID),
|
|
|
|
zap.Any("totalMem", totalMem),
|
|
|
|
zap.Any("usedMem", usedMem),
|
|
|
|
zap.Any("segmentTotalSize", segmentTotalSize),
|
|
|
|
zap.Any("currentSegmentSize", size),
|
2021-11-12 18:49:10 +08:00
|
|
|
zap.Any("thresholdFactor", Params.OverloadedMemoryThresholdPercentage),
|
2021-10-22 20:49:10 +08:00
|
|
|
)
|
2021-11-12 18:49:10 +08:00
|
|
|
if int64(usedMem)+segmentTotalSize+size > int64(float64(totalMem)*Params.OverloadedMemoryThresholdPercentage) {
|
2021-10-22 20:49:10 +08:00
|
|
|
return errors.New(fmt.Sprintln("load segment failed, OOM if load, "+
|
|
|
|
"collectionID = ", collectionID, ", ",
|
|
|
|
"usedMem = ", usedMem, ", ",
|
|
|
|
"segmentTotalSize = ", segmentTotalSize, ", ",
|
|
|
|
"currentSegmentSize = ", size, ", ",
|
|
|
|
"totalMem = ", totalMem, ", ",
|
2021-11-12 18:49:10 +08:00
|
|
|
"thresholdFactor = ", Params.OverloadedMemoryThresholdPercentage,
|
2021-10-22 20:49:10 +08:00
|
|
|
))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-11-12 18:27:10 +08:00
|
|
|
func newSegmentLoader(ctx context.Context,
|
|
|
|
rootCoord types.RootCoord,
|
|
|
|
indexCoord types.IndexCoord,
|
|
|
|
historicalReplica ReplicaInterface,
|
|
|
|
streamingReplica ReplicaInterface,
|
2021-11-21 07:33:14 +08:00
|
|
|
etcdKV *etcdkv.EtcdKV,
|
|
|
|
factory msgstream.Factory) *segmentLoader {
|
2021-02-06 11:35:35 +08:00
|
|
|
option := &minioKV.Option{
|
|
|
|
Address: Params.MinioEndPoint,
|
|
|
|
AccessKeyID: Params.MinioAccessKeyID,
|
|
|
|
SecretAccessKeyID: Params.MinioSecretAccessKey,
|
|
|
|
UseSSL: Params.MinioUseSSLStr,
|
|
|
|
CreateBucket: true,
|
|
|
|
BucketName: Params.MinioBucketName,
|
|
|
|
}
|
|
|
|
|
|
|
|
client, err := minioKV.NewMinIOKV(ctx, option)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
2021-11-12 18:27:10 +08:00
|
|
|
iLoader := newIndexLoader(ctx, rootCoord, indexCoord, historicalReplica)
|
2021-02-06 11:35:35 +08:00
|
|
|
return &segmentLoader{
|
2021-11-12 18:27:10 +08:00
|
|
|
historicalReplica: historicalReplica,
|
|
|
|
streamingReplica: streamingReplica,
|
2021-02-06 11:35:35 +08:00
|
|
|
|
2021-06-19 11:45:09 +08:00
|
|
|
minioKV: client,
|
|
|
|
etcdKV: etcdKV,
|
2021-02-06 11:35:35 +08:00
|
|
|
|
|
|
|
indexLoader: iLoader,
|
2021-11-21 07:33:14 +08:00
|
|
|
|
|
|
|
factory: factory,
|
2021-02-06 11:35:35 +08:00
|
|
|
}
|
|
|
|
}
|