feat: put inverted index into ram (#35222) (#35223)

fix: https://github.com/milvus-io/milvus/issues/35224
pr: https://github.com/milvus-io/milvus/pull/35222

---------

Signed-off-by: longjiquan <jiquan.long@zilliz.com>
This commit is contained in:
Jiquan Long 2024-08-06 10:08:16 +08:00 committed by GitHub
parent 1b304e982a
commit e598233b6a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 28 additions and 3 deletions

View File

@ -79,6 +79,10 @@ type component interface {
Stop() error
}
const (
TmpInvertedIndexPrefix = "/tmp/milvus/inverted-index/"
)
func cleanLocalDir(path string) {
_, statErr := os.Stat(path)
// path exist, but stat error
@ -200,6 +204,7 @@ func (mr *MilvusRoles) runQueryNode(ctx context.Context, localMsg bool, wg *sync
if len(mmapDir) > 0 {
cleanLocalDir(mmapDir)
}
cleanLocalDir(TmpInvertedIndexPrefix)
return runComponent(ctx, localMsg, wg, components.NewQueryNode, metrics.RegisterQueryNode)
}
@ -224,6 +229,7 @@ func (mr *MilvusRoles) runIndexNode(ctx context.Context, localMsg bool, wg *sync
rootPath := paramtable.Get().LocalStorageCfg.Path.GetValue()
indexDataLocalPath := filepath.Join(rootPath, typeutil.IndexNodeRole)
cleanLocalDir(indexDataLocalPath)
cleanLocalDir(TmpInvertedIndexPrefix)
return runComponent(ctx, localMsg, wg, components.NewIndexNode, metrics.RegisterIndexNode)
}

View File

@ -23,6 +23,8 @@
#include "InvertedIndexTantivy.h"
namespace milvus::index {
constexpr const char* TMP_INVERTED_INDEX_PREFIX = "/tmp/milvus/inverted-index/";
inline TantivyDataType
get_tantivy_data_type(proto::schema::DataType data_type) {
switch (data_type) {
@ -71,8 +73,8 @@ InvertedIndexTantivy<T>::InvertedIndexTantivy(
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx, ctx.space_);
auto field =
std::to_string(disk_file_manager_->GetFieldDataMeta().field_id);
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
path_ = prefix;
auto prefix = disk_file_manager_->GetIndexIdentifier();
path_ = std::string(TMP_INVERTED_INDEX_PREFIX) + prefix;
boost::filesystem::create_directories(path_);
d_type_ = get_tantivy_data_type(schema_);
if (tantivy_index_exist(path_.c_str())) {

View File

@ -864,6 +864,12 @@ DiskFileManagerImpl::GetLocalIndexObjectPrefix() {
local_chunk_manager, index_meta_.build_id, index_meta_.index_version);
}
std::string
DiskFileManagerImpl::GetIndexIdentifier() {
return GenIndexPathIdentifier(index_meta_.build_id,
index_meta_.index_version);
}
std::string
DiskFileManagerImpl::GetLocalRawDataObjectPrefix() {
auto local_chunk_manager =

View File

@ -61,6 +61,9 @@ class DiskFileManagerImpl : public FileManagerImpl {
std::string
GetLocalIndexObjectPrefix();
std::string
GetIndexIdentifier();
std::string
GetLocalRawDataObjectPrefix();

View File

@ -438,12 +438,17 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> data,
}
}
std::string
GenIndexPathIdentifier(int64_t build_id, int64_t index_version) {
return std::to_string(build_id) + "/" + std::to_string(index_version) + "/";
}
std::string
GenIndexPathPrefix(ChunkManagerPtr cm,
int64_t build_id,
int64_t index_version) {
return cm->GetRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
std::to_string(build_id) + "/" + std::to_string(index_version) + "/";
GenIndexPathIdentifier(build_id, index_version);
}
std::string

View File

@ -74,6 +74,9 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> array,
std::string
GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id);
std::string
GenIndexPathIdentifier(int64_t build_id, int64_t index_version);
std::string
GenIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, int64_t index_version);