2020-12-10 17:55:55 +08:00
|
|
|
package indexbuilder
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2020-12-13 06:48:05 +08:00
|
|
|
"log"
|
2020-12-22 08:14:36 +08:00
|
|
|
"strconv"
|
2020-12-13 06:48:05 +08:00
|
|
|
"time"
|
|
|
|
|
2020-12-10 17:55:55 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/allocator"
|
|
|
|
"github.com/zilliztech/milvus-distributed/internal/errors"
|
2020-12-22 08:14:36 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/kv"
|
2020-12-13 06:48:05 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/proto/indexbuilderpb"
|
2020-12-22 08:14:36 +08:00
|
|
|
"github.com/zilliztech/milvus-distributed/internal/storage"
|
2020-12-10 17:55:55 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type task interface {
|
|
|
|
ID() UniqueID // return ReqID
|
|
|
|
SetID(uid UniqueID) // set ReqID
|
|
|
|
PreExecute() error
|
|
|
|
Execute() error
|
|
|
|
PostExecute() error
|
|
|
|
WaitToFinish() error
|
|
|
|
Notify(err error)
|
2020-12-13 06:48:05 +08:00
|
|
|
OnEnqueue() error
|
2020-12-10 17:55:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
type BaseTask struct {
|
2020-12-13 06:48:05 +08:00
|
|
|
done chan error
|
|
|
|
ctx context.Context
|
|
|
|
id UniqueID
|
|
|
|
table *metaTable
|
2020-12-10 17:55:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (bt *BaseTask) ID() UniqueID {
|
|
|
|
return bt.id
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bt *BaseTask) setID(id UniqueID) {
|
|
|
|
bt.id = id
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bt *BaseTask) WaitToFinish() error {
|
2020-12-13 06:48:05 +08:00
|
|
|
select {
|
|
|
|
case <-bt.ctx.Done():
|
|
|
|
return errors.New("timeout")
|
|
|
|
case err := <-bt.done:
|
|
|
|
return err
|
2020-12-10 17:55:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bt *BaseTask) Notify(err error) {
|
|
|
|
bt.done <- err
|
|
|
|
}
|
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
type IndexAddTask struct {
|
2020-12-10 17:55:55 +08:00
|
|
|
BaseTask
|
2020-12-13 06:48:05 +08:00
|
|
|
req *indexbuilderpb.BuildIndexRequest
|
|
|
|
indexID UniqueID
|
|
|
|
idAllocator *allocator.IDAllocator
|
|
|
|
buildQueue TaskQueue
|
2020-12-22 08:14:36 +08:00
|
|
|
kv kv.Base
|
2020-12-10 17:55:55 +08:00
|
|
|
}
|
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
func (it *IndexAddTask) SetID(ID UniqueID) {
|
|
|
|
it.BaseTask.setID(ID)
|
|
|
|
}
|
2020-12-10 17:55:55 +08:00
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
func (it *IndexAddTask) OnEnqueue() error {
|
|
|
|
var err error
|
|
|
|
it.indexID, err = it.idAllocator.AllocOne()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-12-10 17:55:55 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
func (it *IndexAddTask) PreExecute() error {
|
|
|
|
log.Println("pretend to check Index Req")
|
|
|
|
err := it.table.AddIndex(it.indexID, it.req)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-12-10 17:55:55 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
func (it *IndexAddTask) Execute() error {
|
|
|
|
t := newIndexBuildTask()
|
|
|
|
t.table = it.table
|
|
|
|
t.indexID = it.indexID
|
2020-12-22 08:14:36 +08:00
|
|
|
t.kv = it.kv
|
2021-01-05 15:14:59 +08:00
|
|
|
t.req = it.req
|
2020-12-13 06:48:05 +08:00
|
|
|
var cancel func()
|
|
|
|
t.ctx, cancel = context.WithTimeout(it.ctx, reqTimeoutInterval)
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
fn := func() error {
|
|
|
|
select {
|
|
|
|
case <-t.ctx.Done():
|
|
|
|
return errors.New("index add timeout")
|
|
|
|
default:
|
|
|
|
return it.buildQueue.Enqueue(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return fn()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *IndexAddTask) PostExecute() error {
|
2020-12-10 17:55:55 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
func NewIndexAddTask() *IndexAddTask {
|
|
|
|
return &IndexAddTask{
|
|
|
|
BaseTask: BaseTask{
|
|
|
|
done: make(chan error),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type IndexBuildTask struct {
|
2020-12-10 17:55:55 +08:00
|
|
|
BaseTask
|
2020-12-22 08:14:36 +08:00
|
|
|
index Index
|
2020-12-13 06:48:05 +08:00
|
|
|
indexID UniqueID
|
2020-12-22 08:14:36 +08:00
|
|
|
kv kv.Base
|
|
|
|
savePaths []string
|
2021-01-05 15:14:59 +08:00
|
|
|
req *indexbuilderpb.BuildIndexRequest
|
2020-12-10 17:55:55 +08:00
|
|
|
}
|
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
func newIndexBuildTask() *IndexBuildTask {
|
|
|
|
return &IndexBuildTask{
|
|
|
|
BaseTask: BaseTask{
|
|
|
|
done: make(chan error, 1), // intend to do this
|
|
|
|
},
|
|
|
|
}
|
2020-12-10 17:55:55 +08:00
|
|
|
}
|
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
func (it *IndexBuildTask) SetID(ID UniqueID) {
|
|
|
|
it.BaseTask.setID(ID)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *IndexBuildTask) OnEnqueue() error {
|
|
|
|
return it.table.UpdateIndexEnqueTime(it.indexID, time.Now())
|
2020-12-10 17:55:55 +08:00
|
|
|
}
|
|
|
|
|
2020-12-13 06:48:05 +08:00
|
|
|
func (it *IndexBuildTask) PreExecute() error {
|
|
|
|
return it.table.UpdateIndexScheduleTime(it.indexID, time.Now())
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *IndexBuildTask) Execute() error {
|
|
|
|
err := it.table.UpdateIndexStatus(it.indexID, indexbuilderpb.IndexStatus_INPROGRESS)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-12-22 08:14:36 +08:00
|
|
|
|
|
|
|
typeParams := make(map[string]string)
|
2021-01-05 15:14:59 +08:00
|
|
|
for _, kvPair := range it.req.GetTypeParams() {
|
2020-12-22 08:14:36 +08:00
|
|
|
key, value := kvPair.GetKey(), kvPair.GetValue()
|
|
|
|
_, ok := typeParams[key]
|
|
|
|
if ok {
|
|
|
|
return errors.New("duplicated key in type params")
|
|
|
|
}
|
|
|
|
typeParams[key] = value
|
|
|
|
}
|
|
|
|
|
|
|
|
indexParams := make(map[string]string)
|
2021-01-05 15:14:59 +08:00
|
|
|
for _, kvPair := range it.req.GetIndexParams() {
|
2020-12-22 08:14:36 +08:00
|
|
|
key, value := kvPair.GetKey(), kvPair.GetValue()
|
|
|
|
_, ok := indexParams[key]
|
|
|
|
if ok {
|
|
|
|
return errors.New("duplicated key in index params")
|
|
|
|
}
|
|
|
|
indexParams[key] = value
|
|
|
|
}
|
|
|
|
|
|
|
|
it.index, err = NewCIndex(typeParams, indexParams)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
getKeyByPathNaive := func(path string) string {
|
|
|
|
// splitElements := strings.Split(path, "/")
|
|
|
|
// return splitElements[len(splitElements)-1]
|
|
|
|
return path
|
|
|
|
}
|
|
|
|
getValueByPath := func(path string) ([]byte, error) {
|
|
|
|
data, err := it.kv.Load(path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return []byte(data), nil
|
|
|
|
}
|
|
|
|
getBlobByPath := func(path string) (*Blob, error) {
|
|
|
|
value, err := getValueByPath(path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &Blob{
|
|
|
|
Key: getKeyByPathNaive(path),
|
|
|
|
Value: value,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
getStorageBlobs := func(blobs []*Blob) []*storage.Blob {
|
2020-12-25 11:10:31 +08:00
|
|
|
return blobs
|
2020-12-22 08:14:36 +08:00
|
|
|
}
|
|
|
|
|
2021-01-05 15:14:59 +08:00
|
|
|
toLoadDataPaths := it.req.GetDataPaths()
|
2020-12-22 08:14:36 +08:00
|
|
|
keys := make([]string, 0)
|
|
|
|
blobs := make([]*Blob, 0)
|
|
|
|
for _, path := range toLoadDataPaths {
|
|
|
|
keys = append(keys, getKeyByPathNaive(path))
|
|
|
|
blob, err := getBlobByPath(path)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
blobs = append(blobs, blob)
|
|
|
|
}
|
|
|
|
|
|
|
|
storageBlobs := getStorageBlobs(blobs)
|
|
|
|
var insertCodec storage.InsertCodec
|
|
|
|
partitionID, segmentID, insertData, err := insertCodec.Deserialize(storageBlobs)
|
|
|
|
if len(insertData.Data) != 1 {
|
|
|
|
return errors.New("we expect only one field in deserialized insert data")
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, value := range insertData.Data {
|
|
|
|
// TODO: BinaryVectorFieldData
|
2021-01-07 15:39:20 +08:00
|
|
|
floatVectorFieldData, fOk := value.(*storage.FloatVectorFieldData)
|
|
|
|
if fOk {
|
|
|
|
err = it.index.BuildFloatVecIndexWithoutIds(floatVectorFieldData.Data)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-01-07 14:56:17 +08:00
|
|
|
}
|
|
|
|
|
2021-01-07 15:39:20 +08:00
|
|
|
binaryVectorFieldData, bOk := value.(*storage.BinaryVectorFieldData)
|
|
|
|
if bOk {
|
|
|
|
err = it.index.BuildBinaryVecIndexWithoutIds(binaryVectorFieldData.Data)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-07 16:13:28 +08:00
|
|
|
if !fOk && !bOk {
|
2021-01-07 15:39:20 +08:00
|
|
|
return errors.New("we expect FloatVectorFieldData or BinaryVectorFieldData")
|
2020-12-22 08:14:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
indexBlobs, err := it.index.Serialize()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
var indexCodec storage.IndexCodec
|
|
|
|
serializedIndexBlobs, err := indexCodec.Serialize(getStorageBlobs(indexBlobs))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
getSavePathByKey := func(key string) string {
|
|
|
|
// TODO: fix me, use more reasonable method
|
|
|
|
return strconv.Itoa(int(it.indexID)) + "/" + strconv.Itoa(int(partitionID)) + "/" + strconv.Itoa(int(segmentID)) + "/" + key
|
|
|
|
}
|
|
|
|
saveBlob := func(path string, value []byte) error {
|
|
|
|
return it.kv.Save(path, string(value))
|
|
|
|
}
|
|
|
|
|
|
|
|
it.savePaths = make([]string, 0)
|
|
|
|
for _, blob := range serializedIndexBlobs {
|
2020-12-25 11:10:31 +08:00
|
|
|
key, value := blob.Key, blob.Value
|
2020-12-22 08:14:36 +08:00
|
|
|
savePath := getSavePathByKey(key)
|
|
|
|
err := saveBlob(savePath, value)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
it.savePaths = append(it.savePaths, savePath)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return it.index.Delete()
|
2020-12-10 17:55:55 +08:00
|
|
|
}
|
2020-12-13 06:48:05 +08:00
|
|
|
|
|
|
|
func (it *IndexBuildTask) PostExecute() error {
|
2020-12-22 08:14:36 +08:00
|
|
|
return it.table.CompleteIndex(it.indexID, it.savePaths)
|
2020-12-13 06:48:05 +08:00
|
|
|
}
|