mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-04 04:49:08 +08:00
fix: centroids file not removed when data skew in major compaction (#34359)
issue: https://github.com/milvus-io/milvus/issues/30633 pr: #34050 Signed-off-by: chasingegg <chao.gao@zilliz.com>
This commit is contained in:
parent
74da97796b
commit
261b61e875
@ -326,6 +326,8 @@ KmeansClustering::StreamingAssignandUpload(
|
|||||||
if (IsDataSkew<T>(config, dim, num_vectors_each_centroid)) {
|
if (IsDataSkew<T>(config, dim, num_vectors_each_centroid)) {
|
||||||
LOG_INFO(msg_header_ + "data skew! skip clustering");
|
LOG_INFO(msg_header_ + "data skew! skip clustering");
|
||||||
// remove uploaded files
|
// remove uploaded files
|
||||||
|
remote_paths_to_size[cluster_result_.centroid_path] =
|
||||||
|
cluster_result_.centroid_file_size;
|
||||||
RemoveClusteringResultFiles(file_manager_->GetChunkManager().get(),
|
RemoveClusteringResultFiles(file_manager_->GetChunkManager().get(),
|
||||||
remote_paths_to_size);
|
remote_paths_to_size);
|
||||||
// skip clustering, nothing takes affect
|
// skip clustering, nothing takes affect
|
||||||
|
@ -78,10 +78,35 @@ transforConfigToPB(const Config& config) {
|
|||||||
return analyze_info;
|
return analyze_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// when we skip clustering, nothing uploaded
|
||||||
|
template <typename T>
|
||||||
|
void
|
||||||
|
CheckResultEmpty(const milvus::clustering::KmeansClusteringPtr& clusteringJob,
|
||||||
|
const milvus::storage::ChunkManagerPtr cm,
|
||||||
|
int64_t segment_id,
|
||||||
|
int64_t segment_id2) {
|
||||||
|
std::string centroids_path_prefix =
|
||||||
|
clusteringJob->GetRemoteCentroidsObjectPrefix();
|
||||||
|
std::string centroid_path =
|
||||||
|
centroids_path_prefix + "/" + std::string(CENTROIDS_NAME);
|
||||||
|
ASSERT_FALSE(cm->Exist(centroid_path));
|
||||||
|
std::string offset_mapping_name = std::string(OFFSET_MAPPING_NAME);
|
||||||
|
std::string centroid_id_mapping_path =
|
||||||
|
clusteringJob->GetRemoteCentroidIdMappingObjectPrefix(segment_id) +
|
||||||
|
"/" + offset_mapping_name;
|
||||||
|
milvus::proto::clustering::ClusteringCentroidIdMappingStats mapping_stats;
|
||||||
|
std::string centroid_id_mapping_path2 =
|
||||||
|
clusteringJob->GetRemoteCentroidIdMappingObjectPrefix(segment_id2) +
|
||||||
|
"/" + offset_mapping_name;
|
||||||
|
ASSERT_FALSE(cm->Exist(centroid_id_mapping_path));
|
||||||
|
ASSERT_FALSE(cm->Exist(centroid_id_mapping_path2));
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
CheckResultCorrectness(
|
CheckResultCorrectness(
|
||||||
const milvus::clustering::KmeansClusteringPtr& clusteringJob,
|
const milvus::clustering::KmeansClusteringPtr& clusteringJob,
|
||||||
|
const milvus::storage::ChunkManagerPtr cm,
|
||||||
int64_t segment_id,
|
int64_t segment_id,
|
||||||
int64_t segment_id2,
|
int64_t segment_id2,
|
||||||
int64_t dim,
|
int64_t dim,
|
||||||
@ -137,6 +162,10 @@ CheckResultCorrectness(
|
|||||||
ASSERT_EQ(mapping_stats2.num_in_centroid(i), num_in_centroid[i]);
|
ASSERT_EQ(mapping_stats2.num_in_centroid(i), num_in_centroid[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// remove files
|
||||||
|
cm->Remove(centroid_path);
|
||||||
|
cm->Remove(centroid_id_mapping_path);
|
||||||
|
cm->Remove(centroid_id_mapping_path2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, DataType dtype>
|
template <typename T, DataType dtype>
|
||||||
@ -196,7 +225,7 @@ test_run() {
|
|||||||
Config config;
|
Config config;
|
||||||
config["max_cluster_ratio"] = 10.0;
|
config["max_cluster_ratio"] = 10.0;
|
||||||
config["max_cluster_size"] = 5L * 1024 * 1024 * 1024;
|
config["max_cluster_size"] = 5L * 1024 * 1024 * 1024;
|
||||||
|
auto clusteringJob = std::make_unique<clustering::KmeansClustering>(ctx);
|
||||||
// no need to sample train data
|
// no need to sample train data
|
||||||
{
|
{
|
||||||
config["min_cluster_ratio"] = 0.01;
|
config["min_cluster_ratio"] = 0.01;
|
||||||
@ -205,10 +234,9 @@ test_run() {
|
|||||||
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
|
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
|
||||||
config["dim"] = dim;
|
config["dim"] = dim;
|
||||||
config["num_rows"] = num_rows;
|
config["num_rows"] = num_rows;
|
||||||
auto clusteringJob =
|
|
||||||
std::make_unique<clustering::KmeansClustering>(ctx);
|
|
||||||
clusteringJob->Run<T>(transforConfigToPB(config));
|
clusteringJob->Run<T>(transforConfigToPB(config));
|
||||||
CheckResultCorrectness<T>(clusteringJob,
|
CheckResultCorrectness<T>(clusteringJob,
|
||||||
|
cm,
|
||||||
segment_id,
|
segment_id,
|
||||||
segment_id2,
|
segment_id2,
|
||||||
dim,
|
dim,
|
||||||
@ -223,10 +251,9 @@ test_run() {
|
|||||||
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
|
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
|
||||||
config["dim"] = dim;
|
config["dim"] = dim;
|
||||||
config["num_rows"] = num_rows;
|
config["num_rows"] = num_rows;
|
||||||
auto clusteringJob =
|
|
||||||
std::make_unique<clustering::KmeansClustering>(ctx);
|
|
||||||
clusteringJob->Run<T>(transforConfigToPB(config));
|
clusteringJob->Run<T>(transforConfigToPB(config));
|
||||||
CheckResultCorrectness<T>(clusteringJob,
|
CheckResultCorrectness<T>(clusteringJob,
|
||||||
|
cm,
|
||||||
segment_id,
|
segment_id,
|
||||||
segment_id2,
|
segment_id2,
|
||||||
dim,
|
dim,
|
||||||
@ -244,11 +271,10 @@ test_run() {
|
|||||||
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
|
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
|
||||||
config["dim"] = dim;
|
config["dim"] = dim;
|
||||||
config["num_rows"] = num_rows;
|
config["num_rows"] = num_rows;
|
||||||
auto clusteringJob =
|
|
||||||
std::make_unique<clustering::KmeansClustering>(ctx);
|
|
||||||
clusteringJob->Run<T>(transforConfigToPB(config));
|
clusteringJob->Run<T>(transforConfigToPB(config));
|
||||||
} catch (SegcoreError& e) {
|
} catch (SegcoreError& e) {
|
||||||
ASSERT_EQ(e.get_error_code(), ErrorCode::ClusterSkip);
|
ASSERT_EQ(e.get_error_code(), ErrorCode::ClusterSkip);
|
||||||
|
CheckResultEmpty<T>(clusteringJob, cm, segment_id, segment_id2);
|
||||||
throw e;
|
throw e;
|
||||||
},
|
},
|
||||||
SegcoreError);
|
SegcoreError);
|
||||||
@ -264,11 +290,10 @@ test_run() {
|
|||||||
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
|
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
|
||||||
config["dim"] = dim;
|
config["dim"] = dim;
|
||||||
config["num_rows"] = num_rows;
|
config["num_rows"] = num_rows;
|
||||||
auto clusteringJob =
|
|
||||||
std::make_unique<clustering::KmeansClustering>(ctx);
|
|
||||||
clusteringJob->Run<T>(transforConfigToPB(config));
|
clusteringJob->Run<T>(transforConfigToPB(config));
|
||||||
} catch (SegcoreError& e) {
|
} catch (SegcoreError& e) {
|
||||||
ASSERT_EQ(e.get_error_code(), ErrorCode::ClusterSkip);
|
ASSERT_EQ(e.get_error_code(), ErrorCode::ClusterSkip);
|
||||||
|
CheckResultEmpty<T>(clusteringJob, cm, segment_id, segment_id2);
|
||||||
throw e;
|
throw e;
|
||||||
},
|
},
|
||||||
SegcoreError);
|
SegcoreError);
|
||||||
@ -282,11 +307,9 @@ test_run() {
|
|||||||
config["train_size"] = 1536L * 1024; // 1.5MB
|
config["train_size"] = 1536L * 1024; // 1.5MB
|
||||||
config["dim"] = dim;
|
config["dim"] = dim;
|
||||||
config["num_rows"] = num_rows;
|
config["num_rows"] = num_rows;
|
||||||
auto clusteringJob =
|
|
||||||
std::make_unique<clustering::KmeansClustering>(ctx);
|
|
||||||
|
|
||||||
clusteringJob->Run<T>(transforConfigToPB(config));
|
clusteringJob->Run<T>(transforConfigToPB(config));
|
||||||
CheckResultCorrectness<T>(clusteringJob,
|
CheckResultCorrectness<T>(clusteringJob,
|
||||||
|
cm,
|
||||||
segment_id,
|
segment_id,
|
||||||
segment_id2,
|
segment_id2,
|
||||||
dim,
|
dim,
|
||||||
@ -302,11 +325,9 @@ test_run() {
|
|||||||
config["train_size"] = 6L * 1024 * 1024; // 6MB
|
config["train_size"] = 6L * 1024 * 1024; // 6MB
|
||||||
config["dim"] = dim;
|
config["dim"] = dim;
|
||||||
config["num_rows"] = num_rows;
|
config["num_rows"] = num_rows;
|
||||||
auto clusteringJob =
|
|
||||||
std::make_unique<clustering::KmeansClustering>(ctx);
|
|
||||||
|
|
||||||
clusteringJob->Run<T>(transforConfigToPB(config));
|
clusteringJob->Run<T>(transforConfigToPB(config));
|
||||||
CheckResultCorrectness<T>(clusteringJob,
|
CheckResultCorrectness<T>(clusteringJob,
|
||||||
|
cm,
|
||||||
segment_id,
|
segment_id,
|
||||||
segment_id2,
|
segment_id2,
|
||||||
dim,
|
dim,
|
||||||
|
Loading…
Reference in New Issue
Block a user