2021-12-29 22:42:14 +08:00
|
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
|
// distributed with this work for additional information
|
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
|
// "License"); you may not use this file except in compliance
|
2021-07-20 15:06:09 +08:00
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
|
//
|
2021-12-29 22:42:14 +08:00
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
2021-07-20 15:06:09 +08:00
|
|
|
|
//
|
2021-12-29 22:42:14 +08:00
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
|
// limitations under the License.
|
2021-07-20 15:06:09 +08:00
|
|
|
|
|
|
|
|
|
package storage
|
|
|
|
|
|
|
|
|
|
import (
|
2021-07-24 09:25:22 +08:00
|
|
|
|
"bytes"
|
2021-07-20 15:06:09 +08:00
|
|
|
|
"encoding/binary"
|
2021-07-24 09:25:22 +08:00
|
|
|
|
"errors"
|
2021-08-18 16:30:11 +08:00
|
|
|
|
"io"
|
2021-07-20 15:06:09 +08:00
|
|
|
|
|
2021-11-02 18:16:32 +08:00
|
|
|
|
"github.com/milvus-io/milvus/internal/common"
|
2021-07-20 15:06:09 +08:00
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
|
|
|
|
)
|
|
|
|
|
|
2021-09-17 14:49:50 +08:00
|
|
|
|
// VectorChunkManager is responsible for read and write vector data.
|
2021-07-20 15:06:09 +08:00
|
|
|
|
type VectorChunkManager struct {
|
|
|
|
|
localChunkManager ChunkManager
|
|
|
|
|
remoteChunkManager ChunkManager
|
2021-08-18 16:30:11 +08:00
|
|
|
|
|
|
|
|
|
schema *etcdpb.CollectionMeta
|
|
|
|
|
|
|
|
|
|
localCacheEnable bool
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-02-24 23:53:53 +08:00
|
|
|
|
var _ ChunkManager = (*VectorChunkManager)(nil)
|
|
|
|
|
|
2021-09-17 14:49:50 +08:00
|
|
|
|
// NewVectorChunkManager create a new vector manager object.
|
2021-08-18 16:30:11 +08:00
|
|
|
|
func NewVectorChunkManager(localChunkManager ChunkManager, remoteChunkManager ChunkManager, schema *etcdpb.CollectionMeta, localCacheEnable bool) *VectorChunkManager {
|
2021-07-20 15:06:09 +08:00
|
|
|
|
return &VectorChunkManager{
|
|
|
|
|
localChunkManager: localChunkManager,
|
|
|
|
|
remoteChunkManager: remoteChunkManager,
|
2021-08-18 16:30:11 +08:00
|
|
|
|
|
|
|
|
|
schema: schema,
|
|
|
|
|
localCacheEnable: localCacheEnable,
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-17 14:49:50 +08:00
|
|
|
|
// For vector data, we will download vector file from storage. And we will
|
|
|
|
|
// deserialize the file for it has binlog style. At last we store pure vector
|
|
|
|
|
// data to local storage as cache.
|
2022-02-24 23:53:53 +08:00
|
|
|
|
func (vcm *VectorChunkManager) downloadVectorFile(filePath string) ([]byte, error) {
|
|
|
|
|
if vcm.localChunkManager.Exist(filePath) {
|
|
|
|
|
return vcm.localChunkManager.Read(filePath)
|
2021-07-28 16:31:22 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
content, err := vcm.remoteChunkManager.Read(filePath)
|
2021-07-20 15:06:09 +08:00
|
|
|
|
if err != nil {
|
2021-08-18 16:30:11 +08:00
|
|
|
|
return nil, err
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
insertCodec := NewInsertCodec(vcm.schema)
|
2021-07-20 15:06:09 +08:00
|
|
|
|
blob := &Blob{
|
2022-02-24 23:53:53 +08:00
|
|
|
|
Key: filePath,
|
2021-07-20 15:06:09 +08:00
|
|
|
|
Value: content,
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-24 09:25:22 +08:00
|
|
|
|
_, _, data, err := insertCodec.Deserialize([]*Blob{blob})
|
2021-07-20 15:06:09 +08:00
|
|
|
|
if err != nil {
|
2021-08-18 16:30:11 +08:00
|
|
|
|
return nil, err
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-08-18 16:30:11 +08:00
|
|
|
|
var results []byte
|
2021-07-20 15:06:09 +08:00
|
|
|
|
for _, singleData := range data.Data {
|
|
|
|
|
binaryVector, ok := singleData.(*BinaryVectorFieldData)
|
|
|
|
|
if ok {
|
2021-08-18 16:30:11 +08:00
|
|
|
|
results = binaryVector.Data
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
floatVector, ok := singleData.(*FloatVectorFieldData)
|
|
|
|
|
if ok {
|
2021-07-24 09:25:22 +08:00
|
|
|
|
buf := new(bytes.Buffer)
|
2021-11-02 18:16:32 +08:00
|
|
|
|
err := binary.Write(buf, common.Endian, floatVector.Data)
|
2021-07-24 09:25:22 +08:00
|
|
|
|
if err != nil {
|
2021-08-18 16:30:11 +08:00
|
|
|
|
return nil, err
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
2021-08-18 16:30:11 +08:00
|
|
|
|
results = buf.Bytes()
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2021-08-18 16:30:11 +08:00
|
|
|
|
return results, nil
|
2021-07-24 09:25:22 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-17 14:49:50 +08:00
|
|
|
|
// GetPath returns the path of vector data. If cached, return local path.
|
|
|
|
|
// If not cached return remote path.
|
2022-02-24 23:53:53 +08:00
|
|
|
|
func (vcm *VectorChunkManager) GetPath(filePath string) (string, error) {
|
|
|
|
|
if vcm.localChunkManager.Exist(filePath) && vcm.localCacheEnable {
|
|
|
|
|
return vcm.localChunkManager.GetPath(filePath)
|
|
|
|
|
}
|
|
|
|
|
return vcm.remoteChunkManager.GetPath(filePath)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (vcm *VectorChunkManager) GetSize(filePath string) (int64, error) {
|
|
|
|
|
if vcm.localChunkManager.Exist(filePath) && vcm.localCacheEnable {
|
|
|
|
|
return vcm.localChunkManager.GetSize(filePath)
|
2021-07-24 09:25:22 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return vcm.remoteChunkManager.GetSize(filePath)
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-17 14:49:50 +08:00
|
|
|
|
// Write writes the vector data to local cache if cache enabled.
|
2022-02-24 23:53:53 +08:00
|
|
|
|
func (vcm *VectorChunkManager) Write(filePath string, content []byte) error {
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if !vcm.localCacheEnable {
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return errors.New("cannot write local file for local cache is not allowed")
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return vcm.localChunkManager.Write(filePath, content)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// MultiWrite writes the vector data to local cache if cache enabled.
|
|
|
|
|
func (vcm *VectorChunkManager) MultiWrite(contents map[string][]byte) error {
|
|
|
|
|
if !vcm.localCacheEnable {
|
|
|
|
|
return errors.New("cannot write local file for local cache is not allowed")
|
|
|
|
|
}
|
|
|
|
|
return vcm.localChunkManager.MultiWrite(contents)
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-17 14:49:50 +08:00
|
|
|
|
// Exist checks whether vector data is saved to local cache.
|
2022-02-24 23:53:53 +08:00
|
|
|
|
func (vcm *VectorChunkManager) Exist(filePath string) bool {
|
|
|
|
|
return vcm.localChunkManager.Exist(filePath)
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-17 14:49:50 +08:00
|
|
|
|
// Read reads the pure vector data. If cached, it reads from local.
|
2022-02-24 23:53:53 +08:00
|
|
|
|
func (vcm *VectorChunkManager) Read(filePath string) ([]byte, error) {
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if vcm.localCacheEnable {
|
2022-02-24 23:53:53 +08:00
|
|
|
|
if vcm.localChunkManager.Exist(filePath) {
|
|
|
|
|
return vcm.localChunkManager.Read(filePath)
|
|
|
|
|
}
|
|
|
|
|
contents, err := vcm.downloadVectorFile(filePath)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
err = vcm.localChunkManager.Write(filePath, contents)
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return vcm.localChunkManager.Read(filePath)
|
|
|
|
|
}
|
|
|
|
|
return vcm.downloadVectorFile(filePath)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// MultiRead reads the pure vector data. If cached, it reads from local.
|
|
|
|
|
func (vcm *VectorChunkManager) MultiRead(filePaths []string) ([][]byte, error) {
|
|
|
|
|
var results [][]byte
|
|
|
|
|
for _, filePath := range filePaths {
|
|
|
|
|
content, err := vcm.Read(filePath)
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
results = append(results, content)
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
|
|
|
|
|
return results, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (vcm *VectorChunkManager) ReadWithPrefix(prefix string) ([]string, [][]byte, error) {
|
|
|
|
|
panic("has not implemented yet")
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-17 14:49:50 +08:00
|
|
|
|
// ReadAt reads specific position data of vector. If cached, it reads from local.
|
2022-02-24 23:53:53 +08:00
|
|
|
|
func (vcm *VectorChunkManager) ReadAt(filePath string, off int64, length int64) ([]byte, error) {
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if vcm.localCacheEnable {
|
2022-02-24 23:53:53 +08:00
|
|
|
|
if vcm.localChunkManager.Exist(filePath) {
|
|
|
|
|
return vcm.localChunkManager.ReadAt(filePath, off, length)
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
results, err := vcm.downloadVectorFile(filePath)
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if err != nil {
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return nil, err
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
err = vcm.localChunkManager.Write(filePath, results)
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if err != nil {
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return nil, err
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return vcm.localChunkManager.ReadAt(filePath, off, length)
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
results, err := vcm.downloadVectorFile(filePath)
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if err != nil {
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return nil, err
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-02-24 23:53:53 +08:00
|
|
|
|
if off < 0 || int64(len(results)) < off {
|
|
|
|
|
return nil, errors.New("vectorChunkManager: invalid offset")
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-02-24 23:53:53 +08:00
|
|
|
|
p := make([]byte, length)
|
|
|
|
|
n := copy(p, results[off:])
|
2021-08-18 16:30:11 +08:00
|
|
|
|
if n < len(p) {
|
2022-02-24 23:53:53 +08:00
|
|
|
|
return nil, io.EOF
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return p, nil
|
|
|
|
|
}
|
|
|
|
|
func (vcm *VectorChunkManager) Remove(filePath string) error {
|
|
|
|
|
err := vcm.localChunkManager.Remove(filePath)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
2021-08-18 16:30:11 +08:00
|
|
|
|
}
|
2022-02-24 23:53:53 +08:00
|
|
|
|
err = vcm.remoteChunkManager.Remove(filePath)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (vcm *VectorChunkManager) MultiRemove(filePaths []string) error {
|
|
|
|
|
err := vcm.localChunkManager.MultiRemove(filePaths)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
err = vcm.remoteChunkManager.MultiRemove(filePaths)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (vcm *VectorChunkManager) RemoveWithPrefix(prefix string) error {
|
|
|
|
|
err := vcm.localChunkManager.RemoveWithPrefix(prefix)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
err = vcm.remoteChunkManager.RemoveWithPrefix(prefix)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2021-08-18 16:30:11 +08:00
|
|
|
|
|
2022-02-24 23:53:53 +08:00
|
|
|
|
func (vcm *VectorChunkManager) Close() error {
|
|
|
|
|
// TODO:Replace the cache with the local chunk manager and clear the cache when closed
|
|
|
|
|
return vcm.localChunkManager.RemoveWithPrefix("")
|
2021-07-20 15:06:09 +08:00
|
|
|
|
}
|