mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-05 05:18:52 +08:00
f0a40f1569
See also: #14620 Signed-off-by: yangxuan <xuan.yang@zilliz.com>
322 lines
8.1 KiB
Go
322 lines
8.1 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datanode
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"path"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/milvus-io/milvus/internal/common"
|
|
"github.com/milvus-io/milvus/internal/kv"
|
|
"github.com/milvus-io/milvus/internal/log"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/sync/errgroup"
|
|
)
|
|
|
|
var (
|
|
errUploadToBlobStorage = errors.New("upload to blob storage wrong")
|
|
errDownloadFromBlobStorage = errors.New("download from blob storage wrong")
|
|
)
|
|
|
|
type downloader interface {
|
|
// load downloads binlogs from blob storage for given paths.
|
|
// The paths are 1 group of binlog paths generated by 1 `Serialize`.
|
|
//
|
|
// download downloads insert-binlogs, stats-binlogs, and delta-binlogs.
|
|
download(ctx context.Context, paths []string) ([]*Blob, error)
|
|
}
|
|
|
|
type uploader interface {
|
|
// upload saves InsertData and DeleteData into blob storage.
|
|
// stats-binlogs are generated from InsertData.
|
|
upload(ctx context.Context, segID, partID UniqueID, iData []*InsertData, dData *DeleteData, meta *etcdpb.CollectionMeta) (*cpaths, error)
|
|
}
|
|
|
|
type binlogIO struct {
|
|
kv.BaseKV
|
|
allocatorInterface
|
|
}
|
|
|
|
var _ downloader = (*binlogIO)(nil)
|
|
var _ uploader = (*binlogIO)(nil)
|
|
|
|
func (b *binlogIO) download(ctx context.Context, paths []string) ([]*Blob, error) {
|
|
var (
|
|
err = errStart
|
|
vs = []string{}
|
|
)
|
|
|
|
g, gCtx := errgroup.WithContext(ctx)
|
|
g.Go(func() error {
|
|
for err != nil {
|
|
select {
|
|
|
|
case <-gCtx.Done():
|
|
log.Warn("ctx done when downloading kvs from blob storage")
|
|
return errDownloadFromBlobStorage
|
|
|
|
default:
|
|
if err != errStart {
|
|
<-time.After(50 * time.Millisecond)
|
|
log.Warn("Try multiloading again", zap.Strings("paths", paths))
|
|
}
|
|
vs, err = b.MultiLoad(paths)
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
|
|
if err := g.Wait(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
rst := make([]*Blob, len(vs))
|
|
for i := range rst {
|
|
rst[i] = &Blob{Value: bytes.NewBufferString(vs[i]).Bytes()}
|
|
}
|
|
|
|
return rst, nil
|
|
}
|
|
|
|
type cpaths struct {
|
|
inPaths []*datapb.FieldBinlog
|
|
statsPaths []*datapb.FieldBinlog
|
|
deltaInfo []*datapb.FieldBinlog
|
|
}
|
|
|
|
func (b *binlogIO) upload(
|
|
ctx context.Context,
|
|
segID, partID UniqueID,
|
|
iDatas []*InsertData,
|
|
dData *DeleteData,
|
|
meta *etcdpb.CollectionMeta) (*cpaths, error) {
|
|
|
|
p := &cpaths{}
|
|
|
|
var (
|
|
inPathm = make(map[UniqueID]*datapb.FieldBinlog) // FieldID > its FieldBinlog
|
|
statsPathm = make(map[UniqueID]*datapb.FieldBinlog) // FieldID > its statsBinlog
|
|
kvs = make(map[string]string)
|
|
)
|
|
|
|
for _, iData := range iDatas {
|
|
tf, ok := iData.Data[common.TimeStampField]
|
|
if !ok || tf.RowNum() == 0 {
|
|
log.Warn("binlog io uploading empty insert data",
|
|
zap.Int64("segmentID", segID),
|
|
zap.Int64("collectionID", meta.GetID()),
|
|
)
|
|
continue
|
|
}
|
|
|
|
kv, inpaths, statspaths, err := b.genInsertBlobs(iData, partID, segID, meta)
|
|
if err != nil {
|
|
log.Warn("generate insert blobs wrong", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
for k, v := range kv {
|
|
kvs[k] = v
|
|
}
|
|
|
|
for fID, fieldBinlog := range inpaths {
|
|
tmpfb, ok := inPathm[fID]
|
|
if !ok {
|
|
tmpfb = fieldBinlog
|
|
} else {
|
|
tmpfb.Binlogs = append(tmpfb.Binlogs, fieldBinlog.GetBinlogs()...)
|
|
}
|
|
inPathm[fID] = tmpfb
|
|
}
|
|
|
|
for fID, fieldBinlog := range statspaths {
|
|
tmpfb, ok := statsPathm[fID]
|
|
if !ok {
|
|
tmpfb = fieldBinlog
|
|
} else {
|
|
tmpfb.Binlogs = append(tmpfb.Binlogs, fieldBinlog.GetBinlogs()...)
|
|
}
|
|
statsPathm[fID] = tmpfb
|
|
}
|
|
}
|
|
|
|
for _, bs := range inPathm {
|
|
p.inPaths = append(p.inPaths, bs)
|
|
}
|
|
|
|
for _, bs := range statsPathm {
|
|
p.statsPaths = append(p.statsPaths, bs)
|
|
}
|
|
|
|
// If there are delta logs
|
|
if dData.RowCount > 0 {
|
|
k, v, err := b.genDeltaBlobs(dData, meta.GetID(), partID, segID)
|
|
if err != nil {
|
|
log.Warn("generate delta blobs wrong", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
kvs[k] = bytes.NewBuffer(v).String()
|
|
p.deltaInfo = append(p.deltaInfo, &datapb.FieldBinlog{
|
|
//Field id shall be primary key id
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
EntriesNum: dData.RowCount,
|
|
LogPath: k,
|
|
LogSize: int64(len(v)),
|
|
},
|
|
},
|
|
})
|
|
}
|
|
|
|
var err = errStart
|
|
g, gCtx := errgroup.WithContext(ctx)
|
|
g.Go(func() error {
|
|
for err != nil {
|
|
select {
|
|
case <-gCtx.Done():
|
|
log.Warn("ctx done when saving kvs to blob storage")
|
|
return errUploadToBlobStorage
|
|
default:
|
|
if err != errStart {
|
|
<-time.After(50 * time.Millisecond)
|
|
log.Info("retry save binlogs")
|
|
}
|
|
err = b.MultiSave(kvs)
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
|
|
if err := g.Wait(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return p, nil
|
|
}
|
|
|
|
// genDeltaBlobs returns key, value
|
|
func (b *binlogIO) genDeltaBlobs(data *DeleteData, collID, partID, segID UniqueID) (string, []byte, error) {
|
|
dCodec := storage.NewDeleteCodec()
|
|
|
|
blob, err := dCodec.Serialize(collID, partID, segID, data)
|
|
if err != nil {
|
|
return "", nil, err
|
|
}
|
|
|
|
k, err := b.genKey(collID, partID, segID)
|
|
if err != nil {
|
|
return "", nil, err
|
|
}
|
|
|
|
key := path.Join(Params.DataNodeCfg.DeleteBinlogRootPath, k)
|
|
|
|
return key, blob.GetValue(), nil
|
|
}
|
|
|
|
// genInsertBlobs returns kvs, insert-paths, stats-paths
|
|
func (b *binlogIO) genInsertBlobs(data *InsertData, partID, segID UniqueID, meta *etcdpb.CollectionMeta) (map[string]string, map[UniqueID]*datapb.FieldBinlog, map[UniqueID]*datapb.FieldBinlog, error) {
|
|
inCodec := storage.NewInsertCodec(meta)
|
|
inlogs, statslogs, err := inCodec.Serialize(partID, segID, data)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
var (
|
|
kvs = make(map[string]string, len(inlogs)+len(statslogs))
|
|
inpaths = make(map[UniqueID]*datapb.FieldBinlog)
|
|
statspaths = make(map[UniqueID]*datapb.FieldBinlog)
|
|
)
|
|
|
|
notifyGenIdx := make(chan struct{})
|
|
defer close(notifyGenIdx)
|
|
|
|
generator, err := b.idxGenerator(len(inlogs)+len(statslogs), notifyGenIdx)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
for _, blob := range inlogs {
|
|
// Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt
|
|
fID, _ := strconv.ParseInt(blob.GetKey(), 10, 64)
|
|
k := JoinIDPath(meta.GetID(), partID, segID, fID, <-generator)
|
|
key := path.Join(Params.DataNodeCfg.InsertBinlogRootPath, k)
|
|
|
|
value := bytes.NewBuffer(blob.GetValue()).String()
|
|
fileLen := len(value)
|
|
|
|
kvs[key] = value
|
|
inpaths[fID] = &datapb.FieldBinlog{
|
|
FieldID: fID,
|
|
Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key}},
|
|
}
|
|
}
|
|
|
|
for _, blob := range statslogs {
|
|
// Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt
|
|
fID, _ := strconv.ParseInt(blob.GetKey(), 10, 64)
|
|
|
|
k := JoinIDPath(meta.GetID(), partID, segID, fID, <-generator)
|
|
key := path.Join(Params.DataNodeCfg.StatsBinlogRootPath, k)
|
|
|
|
value := bytes.NewBuffer(blob.GetValue()).String()
|
|
fileLen := len(value)
|
|
|
|
kvs[key] = value
|
|
statspaths[fID] = &datapb.FieldBinlog{
|
|
FieldID: fID,
|
|
Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key}},
|
|
}
|
|
}
|
|
|
|
return kvs, inpaths, statspaths, nil
|
|
}
|
|
|
|
func (b *binlogIO) idxGenerator(n int, done <-chan struct{}) (<-chan UniqueID, error) {
|
|
|
|
idStart, _, err := b.allocIDBatch(uint32(n))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
rt := make(chan UniqueID)
|
|
go func(rt chan<- UniqueID) {
|
|
for i := 0; i < n; i++ {
|
|
select {
|
|
case <-done:
|
|
close(rt)
|
|
return
|
|
case rt <- idStart + UniqueID(i):
|
|
}
|
|
}
|
|
close(rt)
|
|
}(rt)
|
|
|
|
return rt, nil
|
|
}
|
|
|
|
func (b *binlogIO) close() {
|
|
b.Close()
|
|
}
|