Ignore cases when comparing metric type in segcore (#19437)

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
This commit is contained in:
Cai Yudong 2022-09-26 17:58:52 +08:00 committed by GitHub
parent fc8061bb86
commit 87d78a4a85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 80 additions and 61 deletions

View File

@ -11,12 +11,14 @@
#pragma once
#include <string>
#include "exceptions/EasyAssert.h"
#include "config/ConfigChunkManager.h"
#include "common/Consts.h"
#include <google/protobuf/text_format.h>
#include <string>
#include "common/Consts.h"
#include "config/ConfigChunkManager.h"
#include "exceptions/EasyAssert.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
namespace milvus {
@ -96,4 +98,14 @@ upper_div(int64_t value, int64_t align) {
return groups;
}
inline bool
IsMetricType(const std::string& str, const knowhere::MetricType& metric_type) {
return !strcasecmp(str.c_str(), metric_type.c_str());
}
inline bool
PositivelyRelated(const knowhere::MetricType& metric_type) {
return IsMetricType(metric_type, knowhere::metric::IP);
}
} // namespace milvus

View File

@ -22,7 +22,7 @@ SubSearchResult::merge_impl(const SubSearchResult& right) {
AssertInfo(num_queries_ == right.num_queries_, "[SubSearchResult]Nq check failed");
AssertInfo(topk_ == right.topk_, "[SubSearchResult]Topk check failed");
AssertInfo(metric_type_ == right.metric_type_, "[SubSearchResult]Metric type check failed");
AssertInfo(is_desc == is_descending(metric_type_), "[SubSearchResult]Metric type isn't desc");
AssertInfo(is_desc == PositivelyRelated(metric_type_), "[SubSearchResult]Metric type isn't desc");
for (int64_t qn = 0; qn < num_queries_; ++qn) {
auto offset = qn * topk_;
@ -61,7 +61,7 @@ SubSearchResult::merge_impl(const SubSearchResult& right) {
void
SubSearchResult::merge(const SubSearchResult& sub_result) {
AssertInfo(metric_type_ == sub_result.metric_type_, "[SubSearchResult]Metric type check failed when merge");
if (is_descending(metric_type_)) {
if (PositivelyRelated(metric_type_)) {
this->merge_impl<true>(sub_result);
} else {
this->merge_impl<false>(sub_result);

View File

@ -14,7 +14,9 @@
#include <limits>
#include <utility>
#include <vector>
#include "common/Types.h"
#include "common/Utils.h"
namespace milvus::query {
@ -41,17 +43,7 @@ class SubSearchResult {
public:
static float
init_value(const MetricType& metric_type) {
return (is_descending(metric_type) ? -1 : 1) * std::numeric_limits<float>::max();
}
static bool
is_descending(const MetricType& metric_type) {
// TODO(dog): more types
if (metric_type == knowhere::metric::IP) {
return true;
} else {
return false;
}
return (PositivelyRelated(metric_type) ? -1 : 1) * std::numeric_limits<float>::max();
}
public:

View File

@ -1,21 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include "common/Types.h"
namespace milvus::segcore {
static inline bool
PositivelyRelated(const MetricType& metric_type) {
return metric_type == knowhere::metric::IP;
}
} // namespace milvus::segcore

View File

@ -18,7 +18,6 @@
#include "segcore/Collection.h"
#include "segcore/SegmentGrowingImpl.h"
#include "segcore/SegmentSealedImpl.h"
#include "segcore/SimilarityCorelation.h"
#include "segcore/segment_c.h"
#include "index/IndexInfo.h"
#include "google/protobuf/text_format.h"
@ -72,7 +71,7 @@ Search(CSegmentInterface c_segment,
auto plan = (milvus::query::Plan*)c_plan;
auto phg_ptr = reinterpret_cast<const milvus::query::PlaceholderGroup*>(c_placeholder_group);
auto search_result = segment->Search(plan, phg_ptr, timestamp);
if (!milvus::segcore::PositivelyRelated(plan->plan_node_->search_info_.metric_type_)) {
if (!milvus::PositivelyRelated(plan->plan_node_->search_info_.metric_type_)) {
for (auto& dis : search_result->distances_) {
dis *= -1;
}

View File

@ -11,7 +11,9 @@
#include <gtest/gtest.h>
#include <random>
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
#include "common/Utils.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "query/SearchBruteForce.h"
#include "test_utils/Distance.h"
@ -38,13 +40,13 @@ Distances(const float* base,
int nb,
int dim,
const knowhere::MetricType& metric) {
if (metric == knowhere::metric::L2) {
if (milvus::IsMetricType(metric, knowhere::metric::L2)) {
std::vector<std::tuple<int, float>> res;
for (int i = 0; i < nb; i++) {
res.emplace_back(i, L2(base + i * dim, query, dim));
}
return res;
} else if (metric == knowhere::metric::IP) {
} else if (milvus::IsMetricType(metric, knowhere::metric::IP)) {
std::vector<std::tuple<int, float>> res;
for (int i = 0; i < nb; i++) {
res.emplace_back(i, IP(base + i * dim, query, dim));
@ -75,8 +77,9 @@ Ref(const float* base,
const knowhere::MetricType& metric) {
auto res = Distances(base, query, nb, dim, metric);
std::sort(res.begin(), res.end());
if (metric == knowhere::metric::L2) {
} else if (metric == knowhere::metric::IP) {
if (milvus::IsMetricType(metric, knowhere::metric::L2)) {
// do nothing
} else if (milvus::IsMetricType(metric, knowhere::metric::IP)) {
std::reverse(res.begin(), res.end());
} else {
PanicInfo("invalid metric type");
@ -95,8 +98,8 @@ AssertMatch(const std::vector<int>& ref, const int64_t* ans) {
}
bool
is_supported_float_metric(const knowhere::MetricType& metric) {
return metric == knowhere::metric::L2 || metric == knowhere::metric::IP;
is_supported_float_metric(const std::string& metric) {
return milvus::IsMetricType(metric, knowhere::metric::L2) || milvus::IsMetricType(metric, knowhere::metric::IP);
}
} // namespace
@ -127,11 +130,13 @@ class TestFloatSearchBruteForce : public ::testing::Test {
};
TEST_F(TestFloatSearchBruteForce, L2) {
Run(100, 10, 5, 128, knowhere::metric::L2);
Run(100, 10, 5, 128, "L2");
Run(100, 10, 5, 128, "l2");
}
TEST_F(TestFloatSearchBruteForce, IP) {
Run(100, 10, 5, 128, knowhere::metric::IP);
Run(100, 10, 5, 128, "IP");
Run(100, 10, 5, 128, "ip");
}
TEST_F(TestFloatSearchBruteForce, NotSupported) {

View File

@ -19,6 +19,7 @@
#include "query/generated/ExprVisitor.h"
#include "query/generated/ShowPlanNodeVisitor.h"
#include "segcore/SegmentSealed.h"
#include "test_utils/AssertUtils.h"
#include "test_utils/DataGen.h"
using namespace milvus;
@ -504,7 +505,7 @@ TEST(Query, ExecWithoutPredicate) {
{
"vector": {
"fakevec": {
"metric_type": "L2",
"metric_type": "l2",
"params": {
"nprobe": 10
},
@ -530,6 +531,7 @@ TEST(Query, ExecWithoutPredicate) {
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);
assert_order(*sr, "l2");
std::vector<std::vector<std::string>> results;
int topk = 5;
auto json = SearchResultToJson(*sr);
@ -572,7 +574,7 @@ TEST(Indexing, InnerProduct) {
{
"vector": {
"normalized": {
"metric_type": "IP",
"metric_type": "ip",
"params": {
"nprobe": 10
},
@ -599,6 +601,7 @@ TEST(Indexing, InnerProduct) {
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp ts = N * 2;
auto sr = segment->Search(plan.get(), ph_group.get(), ts);
assert_order(*sr, "ip");
std::cout << SearchResultToJson(*sr).dump(2);
}

View File

@ -11,15 +11,15 @@
#include <gtest/gtest.h>
#include "segcore/SimilarityCorelation.h"
#include "common/Utils.h"
TEST(SimilarityCorelation, Naive) {
ASSERT_TRUE(milvus::segcore::PositivelyRelated(knowhere::metric::IP));
ASSERT_TRUE(milvus::PositivelyRelated(knowhere::metric::IP));
ASSERT_FALSE(milvus::segcore::PositivelyRelated(knowhere::metric::L2));
ASSERT_FALSE(milvus::segcore::PositivelyRelated(knowhere::metric::HAMMING));
ASSERT_FALSE(milvus::segcore::PositivelyRelated(knowhere::metric::JACCARD));
ASSERT_FALSE(milvus::segcore::PositivelyRelated(knowhere::metric::TANIMOTO));
ASSERT_FALSE(milvus::segcore::PositivelyRelated(knowhere::metric::SUBSTRUCTURE));
ASSERT_FALSE(milvus::segcore::PositivelyRelated(knowhere::metric::SUPERSTRUCTURE));
ASSERT_FALSE(milvus::PositivelyRelated(knowhere::metric::L2));
ASSERT_FALSE(milvus::PositivelyRelated(knowhere::metric::HAMMING));
ASSERT_FALSE(milvus::PositivelyRelated(knowhere::metric::JACCARD));
ASSERT_FALSE(milvus::PositivelyRelated(knowhere::metric::TANIMOTO));
ASSERT_FALSE(milvus::PositivelyRelated(knowhere::metric::SUBSTRUCTURE));
ASSERT_FALSE(milvus::PositivelyRelated(knowhere::metric::SUPERSTRUCTURE));
}

View File

@ -25,6 +25,7 @@ compare_float(float x, float y, float epsilon = 0.000001f) {
return true;
return false;
}
bool
compare_double(double x, double y, double epsilon = 0.000001f) {
if (fabs(x - y) < epsilon)
@ -32,6 +33,34 @@ compare_double(double x, double y, double epsilon = 0.000001f) {
return false;
}
inline void
assert_order(const milvus::SearchResult& result, const knowhere::MetricType& metric_type) {
bool dsc = milvus::PositivelyRelated(metric_type);
auto& ids = result.seg_offsets_;
auto& dist = result.distances_;
auto nq = result.total_nq_;
auto topk = result.unity_topK_;
if (dsc) {
for (int i = 0; i < nq; i++) {
for (int j = 1; j < topk; j++) {
auto idx = i * topk + j;
if (ids[idx] != -1) {
ASSERT_GE(dist[idx - 1], dist[idx]);
}
}
}
} else {
for (int i = 0; i < nq; i++) {
for (int j = 1; j < topk; j++) {
auto idx = i * topk + j;
if (ids[idx] != -1) {
ASSERT_LE(dist[idx - 1], dist[idx]);
}
}
}
}
}
template <typename T>
inline void
assert_in(ScalarIndex<T>* index, const std::vector<T>& arr) {

View File

@ -240,9 +240,9 @@ CountDistance(
if (point_a == nullptr || point_b == nullptr) {
return std::numeric_limits<float>::max();
}
if (metric == knowhere::metric::L2) {
if (milvus::IsMetricType(metric, knowhere::metric::L2)) {
return L2(static_cast<const float*>(point_a), static_cast<const float*>(point_b), dim);
} else if (metric == knowhere::metric::JACCARD) {
} else if (milvus::IsMetricType(metric, knowhere::metric::JACCARD)) {
return Jaccard(static_cast<const uint8_t*>(point_a), static_cast<const uint8_t*>(point_b), dim);
} else {
return std::numeric_limits<float>::max();