mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-01 19:39:21 +08:00
Add cache for thirdparty files cache
Signed-off-by: XuanYang-cn <xuan.yang@zilliz.com>
This commit is contained in:
parent
6fddb992f4
commit
e6f726e73a
@ -35,10 +35,6 @@ message( STATUS "Build version = ${MILVUS_VERSION}" )
|
||||
get_last_commit_id( LAST_COMMIT_ID )
|
||||
message( STATUS "LAST_COMMIT_ID = ${LAST_COMMIT_ID}" )
|
||||
|
||||
#configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/src/version.h.in
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}/src/version.h @ONLY )
|
||||
|
||||
# unset(CMAKE_EXPORT_COMPILE_COMMANDS CACHE)
|
||||
set( CMAKE_EXPORT_COMPILE_COMMANDS ON )
|
||||
|
||||
# **************************** Project ****************************
|
||||
|
@ -1,8 +1,9 @@
|
||||
set(COMMON_SRC
|
||||
Schema.cpp
|
||||
)
|
||||
set(COMMON_SRC
|
||||
Schema.cpp
|
||||
Types.cpp
|
||||
)
|
||||
|
||||
add_library(milvus_common
|
||||
${COMMON_SRC}
|
||||
)
|
||||
add_library(milvus_common
|
||||
${COMMON_SRC}
|
||||
)
|
||||
target_link_libraries(milvus_common milvus_proto)
|
||||
|
@ -10,18 +10,13 @@
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
#include "utils/Types.h"
|
||||
#include "common/Types.h"
|
||||
#include "utils/Status.h"
|
||||
#include "utils/EasyAssert.h"
|
||||
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace milvus {
|
||||
|
||||
using Timestamp = uint64_t; // TODO: use TiKV-like timestamp
|
||||
using engine::DataType;
|
||||
using engine::FieldElementType;
|
||||
|
||||
inline int
|
||||
field_sizeof(DataType data_type, int dim = 1) {
|
||||
switch (data_type) {
|
||||
@ -89,7 +84,13 @@ field_is_vector(DataType datatype) {
|
||||
|
||||
struct FieldMeta {
|
||||
public:
|
||||
FieldMeta(std::string_view name, DataType type, int dim = 1) : name_(name), type_(type), dim_(dim) {
|
||||
FieldMeta(std::string_view name, DataType type) : name_(name), type_(type) {
|
||||
Assert(!is_vector());
|
||||
}
|
||||
|
||||
FieldMeta(std::string_view name, DataType type, int64_t dim, MetricType metric_type)
|
||||
: name_(name), type_(type), vector_info_(VectorInfo{dim, metric_type}) {
|
||||
Assert(is_vector());
|
||||
}
|
||||
|
||||
bool
|
||||
@ -98,14 +99,11 @@ struct FieldMeta {
|
||||
return type_ == DataType::VECTOR_BINARY || type_ == DataType::VECTOR_FLOAT;
|
||||
}
|
||||
|
||||
void
|
||||
set_dim(int dim) {
|
||||
dim_ = dim;
|
||||
}
|
||||
|
||||
int
|
||||
int64_t
|
||||
get_dim() const {
|
||||
return dim_;
|
||||
Assert(is_vector());
|
||||
Assert(vector_info_.has_value());
|
||||
return vector_info_->dim_;
|
||||
}
|
||||
|
||||
const std::string&
|
||||
@ -120,12 +118,20 @@ struct FieldMeta {
|
||||
|
||||
int
|
||||
get_sizeof() const {
|
||||
return field_sizeof(type_, dim_);
|
||||
if (is_vector()) {
|
||||
return field_sizeof(type_, get_dim());
|
||||
} else {
|
||||
return field_sizeof(type_, 1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
struct VectorInfo {
|
||||
int64_t dim_;
|
||||
MetricType metric_type_;
|
||||
};
|
||||
std::string name_;
|
||||
DataType type_ = DataType::NONE;
|
||||
int dim_ = 1;
|
||||
std::optional<VectorInfo> vector_info_;
|
||||
};
|
||||
} // namespace milvus
|
||||
|
@ -11,35 +11,50 @@
|
||||
|
||||
#include "common/Schema.h"
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
namespace milvus {
|
||||
|
||||
using std::string;
|
||||
static std::map<string, string>
|
||||
RepeatedKeyValToMap(const google::protobuf::RepeatedPtrField<proto::common::KeyValuePair>& kvs) {
|
||||
std::map<string, string> mapping;
|
||||
for (auto& kv : kvs) {
|
||||
AssertInfo(!mapping.count(kv.key()), "repeat key(" + kv.key() + ") in protobuf");
|
||||
mapping.emplace(kv.key(), kv.value());
|
||||
}
|
||||
return mapping;
|
||||
}
|
||||
|
||||
std::shared_ptr<Schema>
|
||||
Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->set_auto_id(schema_proto.autoid());
|
||||
for (const milvus::proto::schema::FieldSchema& child : schema_proto.fields()) {
|
||||
const auto& type_params = child.type_params();
|
||||
int64_t dim = -1;
|
||||
auto data_type = DataType(child.data_type());
|
||||
for (const auto& type_param : type_params) {
|
||||
if (type_param.key() == "dim") {
|
||||
dim = strtoll(type_param.value().c_str(), nullptr, 10);
|
||||
}
|
||||
}
|
||||
|
||||
if (field_is_vector(data_type)) {
|
||||
AssertInfo(dim != -1, "dim not found");
|
||||
} else {
|
||||
AssertInfo(dim == 1 || dim == -1, "Invalid dim field. Should be 1 or not exists");
|
||||
dim = 1;
|
||||
}
|
||||
|
||||
if (child.is_primary_key()) {
|
||||
AssertInfo(!schema->primary_key_offset_opt_.has_value(), "repetitive primary key");
|
||||
schema->primary_key_offset_opt_ = schema->size();
|
||||
}
|
||||
|
||||
schema->AddField(child.name(), data_type, dim);
|
||||
if (field_is_vector(data_type)) {
|
||||
auto type_map = RepeatedKeyValToMap(child.type_params());
|
||||
auto index_map = RepeatedKeyValToMap(child.index_params());
|
||||
if (!index_map.count("metric_type")) {
|
||||
auto default_metric_type =
|
||||
data_type == DataType::VECTOR_FLOAT ? MetricType::METRIC_L2 : MetricType::METRIC_Jaccard;
|
||||
index_map["metric_type"] = default_metric_type;
|
||||
}
|
||||
|
||||
AssertInfo(type_map.count("dim"), "dim not found");
|
||||
auto dim = boost::lexical_cast<int64_t>(type_map.at("dim"));
|
||||
AssertInfo(index_map.count("metric_type"), "index not found");
|
||||
auto metric_type = GetMetricType(index_map.at("metric_type"));
|
||||
schema->AddField(child.name(), data_type, dim, metric_type);
|
||||
} else {
|
||||
schema->AddField(child.name(), data_type);
|
||||
}
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
|
@ -24,19 +24,15 @@ namespace milvus {
|
||||
class Schema {
|
||||
public:
|
||||
void
|
||||
AddField(std::string_view field_name, DataType data_type, int dim = 1) {
|
||||
auto field_meta = FieldMeta(field_name, data_type, dim);
|
||||
AddField(std::string_view field_name, DataType data_type) {
|
||||
auto field_meta = FieldMeta(field_name, data_type);
|
||||
this->AddField(std::move(field_meta));
|
||||
}
|
||||
|
||||
void
|
||||
AddField(FieldMeta field_meta) {
|
||||
auto offset = fields_.size();
|
||||
fields_.emplace_back(field_meta);
|
||||
offsets_.emplace(field_meta.get_name(), offset);
|
||||
auto field_sizeof = field_meta.get_sizeof();
|
||||
sizeof_infos_.push_back(field_sizeof);
|
||||
total_sizeof_ += field_sizeof;
|
||||
AddField(std::string_view field_name, DataType data_type, int64_t dim, MetricType metric_type) {
|
||||
auto field_meta = FieldMeta(field_name, data_type, dim, metric_type);
|
||||
this->AddField(std::move(field_meta));
|
||||
}
|
||||
|
||||
void
|
||||
@ -44,17 +40,6 @@ class Schema {
|
||||
is_auto_id_ = is_auto_id;
|
||||
}
|
||||
|
||||
auto
|
||||
begin() {
|
||||
return fields_.begin();
|
||||
}
|
||||
|
||||
auto
|
||||
end() {
|
||||
return fields_.end();
|
||||
}
|
||||
|
||||
public:
|
||||
bool
|
||||
get_is_auto_id() const {
|
||||
return is_auto_id_;
|
||||
@ -123,11 +108,20 @@ class Schema {
|
||||
static std::shared_ptr<Schema>
|
||||
ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto);
|
||||
|
||||
void
|
||||
AddField(FieldMeta&& field_meta) {
|
||||
auto offset = fields_.size();
|
||||
fields_.emplace_back(field_meta);
|
||||
offsets_.emplace(field_meta.get_name(), offset);
|
||||
auto field_sizeof = field_meta.get_sizeof();
|
||||
sizeof_infos_.push_back(std::move(field_sizeof));
|
||||
total_sizeof_ += field_sizeof;
|
||||
}
|
||||
|
||||
private:
|
||||
// this is where data holds
|
||||
std::vector<FieldMeta> fields_;
|
||||
|
||||
private:
|
||||
// a mapping for random access
|
||||
std::unordered_map<std::string, int> offsets_;
|
||||
std::vector<int> sizeof_infos_;
|
||||
|
45
internal/core/src/common/Types.cpp
Normal file
45
internal/core/src/common/Types.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
//
|
||||
// Created by mike on 12/3/20.
|
||||
//
|
||||
#include "common/Types.h"
|
||||
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
|
||||
#include "utils/EasyAssert.h"
|
||||
#include <boost/bimap.hpp>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
|
||||
namespace milvus {
|
||||
|
||||
using boost::algorithm::to_lower_copy;
|
||||
namespace Metric = knowhere::Metric;
|
||||
static auto map = [] {
|
||||
boost::bimap<std::string, MetricType> mapping;
|
||||
using pos = boost::bimap<std::string, MetricType>::value_type;
|
||||
mapping.insert(pos(to_lower_copy(std::string(Metric::L2)), MetricType::METRIC_L2));
|
||||
mapping.insert(pos(to_lower_copy(std::string(Metric::IP)), MetricType::METRIC_INNER_PRODUCT));
|
||||
mapping.insert(pos(to_lower_copy(std::string(Metric::JACCARD)), MetricType::METRIC_Jaccard));
|
||||
mapping.insert(pos(to_lower_copy(std::string(Metric::TANIMOTO)), MetricType::METRIC_Tanimoto));
|
||||
mapping.insert(pos(to_lower_copy(std::string(Metric::HAMMING)), MetricType::METRIC_Hamming));
|
||||
mapping.insert(pos(to_lower_copy(std::string(Metric::SUBSTRUCTURE)), MetricType::METRIC_Substructure));
|
||||
mapping.insert(pos(to_lower_copy(std::string(Metric::SUPERSTRUCTURE)), MetricType::METRIC_Superstructure));
|
||||
return mapping;
|
||||
}();
|
||||
|
||||
MetricType
|
||||
GetMetricType(const std::string& type_name) {
|
||||
auto real_name = to_lower_copy(type_name);
|
||||
AssertInfo(map.left.count(real_name), "metric type not found: " + type_name);
|
||||
return map.left.at(real_name);
|
||||
}
|
||||
|
||||
} // namespace milvus
|
27
internal/core/src/common/Types.h
Normal file
27
internal/core/src/common/Types.h
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
#include "utils/Types.h"
|
||||
#include <faiss/MetricType.h>
|
||||
#include <string>
|
||||
|
||||
namespace milvus {
|
||||
using Timestamp = uint64_t; // TODO: use TiKV-like timestamp
|
||||
using engine::DataType;
|
||||
using engine::FieldElementType;
|
||||
using engine::QueryResult;
|
||||
using MetricType = faiss::MetricType;
|
||||
|
||||
faiss::MetricType
|
||||
GetMetricType(const std::string& type);
|
||||
|
||||
} // namespace milvus
|
@ -11,20 +11,66 @@
|
||||
|
||||
#include "BruteForceSearch.h"
|
||||
#include <vector>
|
||||
#include <common/Types.h>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <queue>
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
void
|
||||
BinarySearchBruteForce(faiss::MetricType metric_type,
|
||||
int64_t code_size,
|
||||
const uint8_t* binary_chunk,
|
||||
int64_t chunk_size,
|
||||
int64_t topk,
|
||||
int64_t num_queries,
|
||||
const uint8_t* query_data,
|
||||
float* result_distances,
|
||||
idx_t* result_labels,
|
||||
faiss::ConcurrentBitsetPtr bitset) {
|
||||
BinarySearchBruteForceNaive(MetricType metric_type,
|
||||
int64_t code_size,
|
||||
const uint8_t* binary_chunk,
|
||||
int64_t chunk_size,
|
||||
int64_t topk,
|
||||
int64_t num_queries,
|
||||
const uint8_t* query_data,
|
||||
float* result_distances,
|
||||
idx_t* result_labels,
|
||||
faiss::ConcurrentBitsetPtr bitset) {
|
||||
// THIS IS A NAIVE IMPLEMENTATION, ready for optimize
|
||||
Assert(metric_type == faiss::METRIC_Jaccard);
|
||||
Assert(code_size % 4 == 0);
|
||||
|
||||
using T = std::tuple<float, int>;
|
||||
|
||||
for (int64_t q = 0; q < num_queries; ++q) {
|
||||
auto query_ptr = query_data + code_size * q;
|
||||
auto query = boost::dynamic_bitset(query_ptr, query_ptr + code_size);
|
||||
std::vector<T> max_heap(topk + 1, std::make_tuple(std::numeric_limits<float>::max(), -1));
|
||||
|
||||
for (int64_t i = 0; i < chunk_size; ++i) {
|
||||
auto element_ptr = binary_chunk + code_size * i;
|
||||
auto element = boost::dynamic_bitset(element_ptr, element_ptr + code_size);
|
||||
auto the_and = (query & element).count();
|
||||
auto the_or = (query | element).count();
|
||||
auto distance = the_or ? (float)(the_or - the_and) / the_or : 0;
|
||||
if (distance < std::get<0>(max_heap[0])) {
|
||||
max_heap[topk] = std::make_tuple(distance, i);
|
||||
std::push_heap(max_heap.begin(), max_heap.end());
|
||||
std::pop_heap(max_heap.begin(), max_heap.end());
|
||||
}
|
||||
}
|
||||
std::sort(max_heap.begin(), max_heap.end());
|
||||
for (int k = 0; k < topk; ++k) {
|
||||
auto info = max_heap[k];
|
||||
result_distances[k + q * topk] = std::get<0>(info);
|
||||
result_labels[k + q * topk] = std::get<1>(info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BinarySearchBruteForceFast(MetricType metric_type,
|
||||
int64_t code_size,
|
||||
const uint8_t* binary_chunk,
|
||||
int64_t chunk_size,
|
||||
int64_t topk,
|
||||
int64_t num_queries,
|
||||
const uint8_t* query_data,
|
||||
float* result_distances,
|
||||
idx_t* result_labels,
|
||||
faiss::ConcurrentBitsetPtr bitset) {
|
||||
const idx_t block_size = segcore::DefaultElementPerChunk;
|
||||
bool use_heap = true;
|
||||
|
||||
@ -83,6 +129,21 @@ BinarySearchBruteForce(faiss::MetricType metric_type,
|
||||
for (int i = 0; i < num_queries; ++i) {
|
||||
result_distances[i] = static_cast<float>(int_distances[i]);
|
||||
}
|
||||
} else {
|
||||
PanicInfo("Unsupported metric type");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
|
||||
const uint8_t* binary_chunk,
|
||||
int64_t chunk_size,
|
||||
float* result_distances,
|
||||
idx_t* result_labels,
|
||||
faiss::ConcurrentBitsetPtr bitset) {
|
||||
// TODO: refactor the internal function
|
||||
BinarySearchBruteForceFast(query_dataset.metric_type, query_dataset.code_size, binary_chunk, chunk_size,
|
||||
query_dataset.topk, query_dataset.num_queries, query_dataset.query_data,
|
||||
result_distances, result_labels, bitset);
|
||||
}
|
||||
} // namespace milvus::query
|
||||
|
@ -15,15 +15,25 @@
|
||||
#include "common/Schema.h"
|
||||
|
||||
namespace milvus::query {
|
||||
using MetricType = faiss::MetricType;
|
||||
|
||||
namespace dataset {
|
||||
struct BinaryQueryDataset {
|
||||
MetricType metric_type;
|
||||
int64_t num_queries;
|
||||
int64_t topk;
|
||||
int64_t code_size;
|
||||
const uint8_t* query_data;
|
||||
};
|
||||
|
||||
} // namespace dataset
|
||||
|
||||
void
|
||||
BinarySearchBruteForce(faiss::MetricType metric_type,
|
||||
int64_t code_size,
|
||||
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
|
||||
const uint8_t* binary_chunk,
|
||||
int64_t chunk_size,
|
||||
int64_t topk,
|
||||
int64_t num_queries,
|
||||
const uint8_t* query_data,
|
||||
float* result_distances,
|
||||
idx_t* result_labels,
|
||||
faiss::ConcurrentBitsetPtr bitset = nullptr);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
@ -26,15 +26,25 @@ static std::unique_ptr<VectorPlanNode>
|
||||
ParseVecNode(Plan* plan, const Json& out_body) {
|
||||
Assert(out_body.is_object());
|
||||
// TODO add binary info
|
||||
auto vec_node = std::make_unique<FloatVectorANNS>();
|
||||
Assert(out_body.size() == 1);
|
||||
auto iter = out_body.begin();
|
||||
std::string field_name = iter.key();
|
||||
|
||||
auto& vec_info = iter.value();
|
||||
Assert(vec_info.is_object());
|
||||
auto topK = vec_info["topk"];
|
||||
AssertInfo(topK > 0, "topK must greater than 0");
|
||||
AssertInfo(topK < 16384, "topK is too large");
|
||||
auto field_meta = plan->schema_.operator[](field_name);
|
||||
|
||||
auto vec_node = [&]() -> std::unique_ptr<VectorPlanNode> {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
if (data_type == DataType::VECTOR_FLOAT) {
|
||||
return std::make_unique<FloatVectorANNS>();
|
||||
} else {
|
||||
return std::make_unique<BinaryVectorANNS>();
|
||||
}
|
||||
}();
|
||||
vec_node->query_info_.topK_ = topK;
|
||||
vec_node->query_info_.metric_type_ = vec_info.at("metric_type");
|
||||
vec_node->query_info_.search_params_ = vec_info.at("params");
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include <faiss/utils/distances.h>
|
||||
#include "utils/tools.h"
|
||||
#include "query/BruteForceSearch.h"
|
||||
|
||||
namespace milvus::query {
|
||||
using segcore::DefaultElementPerChunk;
|
||||
@ -41,7 +42,7 @@ QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmaps_opt,
|
||||
segcore::QueryResult& results) {
|
||||
QueryResult& results) {
|
||||
auto& schema = segment.get_schema();
|
||||
auto& indexing_record = segment.get_indexing_record();
|
||||
auto& record = segment.get_insert_record();
|
||||
@ -131,7 +132,92 @@ QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
|
||||
}
|
||||
results.result_ids_ = std::move(final_uids);
|
||||
// TODO: deprecated code end
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
BinaryQueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
|
||||
const query::QueryInfo& info,
|
||||
const uint8_t* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmaps_opt,
|
||||
QueryResult& results) {
|
||||
auto& schema = segment.get_schema();
|
||||
auto& indexing_record = segment.get_indexing_record();
|
||||
auto& record = segment.get_insert_record();
|
||||
// step 1: binary search to find the barrier of the snapshot
|
||||
auto ins_barrier = get_barrier(record, timestamp);
|
||||
auto max_chunk = upper_div(ins_barrier, DefaultElementPerChunk);
|
||||
auto metric_type = GetMetricType(info.metric_type_);
|
||||
// auto del_barrier = get_barrier(deleted_record_, timestamp);
|
||||
|
||||
#if 0
|
||||
auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier);
|
||||
Assert(bitmap_holder);
|
||||
auto bitmap = bitmap_holder->bitmap_ptr;
|
||||
#endif
|
||||
|
||||
// step 2.1: get meta
|
||||
// step 2.2: get which vector field to search
|
||||
auto vecfield_offset_opt = schema.get_offset(info.field_id_);
|
||||
Assert(vecfield_offset_opt.has_value());
|
||||
auto vecfield_offset = vecfield_offset_opt.value();
|
||||
auto& field = schema[vecfield_offset];
|
||||
|
||||
Assert(field.get_data_type() == DataType::VECTOR_BINARY);
|
||||
auto dim = field.get_dim();
|
||||
auto code_size = dim / 8;
|
||||
auto topK = info.topK_;
|
||||
auto total_count = topK * num_queries;
|
||||
|
||||
// step 3: small indexing search
|
||||
std::vector<int64_t> final_uids(total_count, -1);
|
||||
std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
|
||||
query::dataset::BinaryQueryDataset query_dataset{metric_type, num_queries, topK, code_size, query_data};
|
||||
|
||||
using segcore::BinaryVector;
|
||||
auto vec_ptr = record.get_entity<BinaryVector>(vecfield_offset);
|
||||
|
||||
auto max_indexed_id = 0;
|
||||
// step 4: brute force search where small indexing is unavailable
|
||||
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
|
||||
std::vector<int64_t> buf_uids(total_count, -1);
|
||||
std::vector<float> buf_dis(total_count, std::numeric_limits<float>::max());
|
||||
|
||||
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
||||
auto nsize =
|
||||
chunk_id != max_chunk - 1 ? DefaultElementPerChunk : ins_barrier - chunk_id * DefaultElementPerChunk;
|
||||
|
||||
auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id);
|
||||
BinarySearchBruteForce(query_dataset, chunk.data(), nsize, buf_dis.data(), buf_uids.data(), bitmap_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : buf_uids) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * DefaultElementPerChunk;
|
||||
}
|
||||
}
|
||||
|
||||
segcore::merge_into(num_queries, topK, final_dis.data(), final_uids.data(), buf_dis.data(), buf_uids.data());
|
||||
}
|
||||
|
||||
results.result_distances_ = std::move(final_dis);
|
||||
results.internal_seg_offsets_ = std::move(final_uids);
|
||||
results.topK_ = topK;
|
||||
results.num_queries_ = num_queries;
|
||||
|
||||
// TODO: deprecated code begin
|
||||
final_uids = results.internal_seg_offsets_;
|
||||
for (auto& id : final_uids) {
|
||||
if (id == -1) {
|
||||
continue;
|
||||
}
|
||||
id = record.uids_[id];
|
||||
}
|
||||
results.result_ids_ = std::move(final_uids);
|
||||
// TODO: deprecated code end
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace milvus::query
|
||||
|
@ -27,5 +27,14 @@ QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmap_opt,
|
||||
segcore::QueryResult& results);
|
||||
QueryResult& results);
|
||||
|
||||
Status
|
||||
BinaryQueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
|
||||
const query::QueryInfo& info,
|
||||
const uint8_t* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmaps_opt,
|
||||
QueryResult& results);
|
||||
} // namespace milvus::query
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "utils/Types.h"
|
||||
#include "common/Types.h"
|
||||
#include "utils/Json.h"
|
||||
|
||||
namespace milvus {
|
||||
|
@ -28,7 +28,7 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
|
||||
visit(BinaryVectorANNS& node) override;
|
||||
|
||||
public:
|
||||
using RetType = segcore::QueryResult;
|
||||
using RetType = QueryResult;
|
||||
ExecPlanNodeVisitor(segcore::SegmentBase& segment, Timestamp timestamp, const PlaceholderGroup& placeholder_group)
|
||||
: segment_(segment), timestamp_(timestamp), placeholder_group_(placeholder_group) {
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ namespace impl {
|
||||
// WILL BE USED BY GENERATOR UNDER suvlim/core_gen/
|
||||
class ExecPlanNodeVisitor : PlanNodeVisitor {
|
||||
public:
|
||||
using RetType = segcore::QueryResult;
|
||||
using RetType = QueryResult;
|
||||
ExecPlanNodeVisitor(segcore::SegmentBase& segment, Timestamp timestamp, const PlaceholderGroup& placeholder_group)
|
||||
: segment_(segment), timestamp_(timestamp), placeholder_group_(placeholder_group) {
|
||||
}
|
||||
@ -75,7 +75,22 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
||||
|
||||
void
|
||||
ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) {
|
||||
// TODO
|
||||
// TODO: optimize here, remove the dynamic cast
|
||||
assert(!ret_.has_value());
|
||||
auto segment = dynamic_cast<segcore::SegmentSmallIndex*>(&segment_);
|
||||
AssertInfo(segment, "support SegmentSmallIndex Only");
|
||||
RetType ret;
|
||||
auto& ph = placeholder_group_.at(0);
|
||||
auto src_data = ph.get_blob<uint8_t>();
|
||||
auto num_queries = ph.num_of_queries_;
|
||||
if (node.predicate_.has_value()) {
|
||||
auto bitmap = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
|
||||
auto ptr = &bitmap;
|
||||
BinaryQueryBruteForceImpl(*segment, node.query_info_, src_data, num_queries, timestamp_, ptr, ret);
|
||||
} else {
|
||||
BinaryQueryBruteForceImpl(*segment, node.query_info_, src_data, num_queries, timestamp_, std::nullopt, ret);
|
||||
}
|
||||
ret_ = ret;
|
||||
}
|
||||
|
||||
} // namespace milvus::query
|
||||
|
@ -73,7 +73,24 @@ ShowPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
||||
|
||||
void
|
||||
ShowPlanNodeVisitor::visit(BinaryVectorANNS& node) {
|
||||
// TODO
|
||||
assert(!ret_);
|
||||
auto& info = node.query_info_;
|
||||
Json json_body{
|
||||
{"node_type", "BinaryVectorANNS"}, //
|
||||
{"metric_type", info.metric_type_}, //
|
||||
{"field_id_", info.field_id_}, //
|
||||
{"topK", info.topK_}, //
|
||||
{"search_params", info.search_params_}, //
|
||||
{"placeholder_tag", node.placeholder_tag_}, //
|
||||
};
|
||||
if (node.predicate_.has_value()) {
|
||||
ShowExprVisitor expr_show;
|
||||
Assert(node.predicate_.value());
|
||||
json_body["predicate"] = expr_show.call_child(node.predicate_->operator*());
|
||||
} else {
|
||||
json_body["predicate"] = "None";
|
||||
}
|
||||
ret_ = json_body;
|
||||
}
|
||||
|
||||
} // namespace milvus::query
|
||||
|
@ -123,9 +123,10 @@ Collection::CreateIndex(std::string& index_config) {
|
||||
void
|
||||
Collection::parse() {
|
||||
if (collection_proto_.empty()) {
|
||||
// TODO: remove hard code use unittests are ready
|
||||
std::cout << "WARN: Use default schema" << std::endl;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
schema_ = schema;
|
||||
return;
|
||||
|
@ -226,8 +226,14 @@ class ConcurrentVector : public ConcurrentVectorImpl<Type, true> {
|
||||
using ConcurrentVectorImpl<Type, true>::ConcurrentVectorImpl;
|
||||
};
|
||||
|
||||
class FloatVector {};
|
||||
class BinaryVector {};
|
||||
class VectorTrait {};
|
||||
|
||||
class FloatVector : public VectorTrait {
|
||||
using embedded_type = float;
|
||||
};
|
||||
class BinaryVector : public VectorTrait {
|
||||
using embedded_type = uint8_t;
|
||||
};
|
||||
|
||||
template <>
|
||||
class ConcurrentVector<FloatVector> : public ConcurrentVectorImpl<float, false> {
|
||||
|
@ -85,8 +85,6 @@ IndexingRecord::UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record)
|
||||
template <typename T>
|
||||
void
|
||||
ScalarIndexingEntry<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
|
||||
auto dim = field_meta_.get_dim();
|
||||
|
||||
auto source = dynamic_cast<const ConcurrentVector<T>*>(vec_base);
|
||||
Assert(source);
|
||||
auto chunk_size = source->chunk_size();
|
||||
|
@ -24,7 +24,7 @@ namespace milvus {
|
||||
namespace segcore {
|
||||
// using engine::DataChunk;
|
||||
// using engine::DataChunkPtr;
|
||||
using engine::QueryResult;
|
||||
using QueryResult = milvus::QueryResult;
|
||||
struct RowBasedRawData {
|
||||
void* raw_data; // schema
|
||||
int sizeof_per_row; // alignment
|
||||
|
@ -42,7 +42,7 @@ DeleteSegment(CSegmentBase segment) {
|
||||
|
||||
void
|
||||
DeleteQueryResult(CQueryResult query_result) {
|
||||
auto res = (milvus::segcore::QueryResult*)query_result;
|
||||
auto res = (milvus::QueryResult*)query_result;
|
||||
delete res;
|
||||
}
|
||||
|
||||
@ -134,7 +134,7 @@ Search(CSegmentBase c_segment,
|
||||
placeholder_groups.push_back((const milvus::query::PlaceholderGroup*)c_placeholder_groups[i]);
|
||||
}
|
||||
|
||||
auto query_result = std::make_unique<milvus::segcore::QueryResult>();
|
||||
auto query_result = std::make_unique<milvus::QueryResult>();
|
||||
|
||||
auto status = CStatus();
|
||||
try {
|
||||
|
@ -42,8 +42,11 @@ EasyAssertInfo(
|
||||
|
||||
[[noreturn]] void
|
||||
ThrowWithTrace(const std::exception& exception) {
|
||||
if (typeid(exception) == typeid(WrappedRuntimError)) {
|
||||
throw exception;
|
||||
}
|
||||
auto err_msg = exception.what() + std::string("\n") + EasyStackTrace();
|
||||
throw std::runtime_error(err_msg);
|
||||
throw WrappedRuntimError(err_msg);
|
||||
}
|
||||
|
||||
} // namespace milvus::impl
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#pragma once
|
||||
#include <string_view>
|
||||
#include <stdexcept>
|
||||
#include <exception>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -22,6 +23,10 @@ void
|
||||
EasyAssertInfo(
|
||||
bool value, std::string_view expr_str, std::string_view filename, int lineno, std::string_view extra_info);
|
||||
|
||||
class WrappedRuntimError : public std::runtime_error {
|
||||
using std::runtime_error::runtime_error;
|
||||
};
|
||||
|
||||
[[noreturn]] void
|
||||
ThrowWithTrace(const std::exception& exception);
|
||||
|
||||
|
6
internal/core/thirdparty/CMakeLists.txt
vendored
6
internal/core/thirdparty/CMakeLists.txt
vendored
@ -26,6 +26,12 @@ include( FetchContent )
|
||||
set( FETCHCONTENT_BASE_DIR ${MILVUS_BINARY_DIR}/3rdparty_download )
|
||||
set( FETCHCONTENT_QUIET OFF )
|
||||
|
||||
if( CUSTOM_THIRDPARTY_DOWNLOAD_PATH )
|
||||
set( THIRDPARTY_DOWNLOAD_PATH ${CUSTOM_THIRDPARTY_DOWNLOAD_PATH} )
|
||||
else()
|
||||
set( THIRDPARTY_DOWNLOAD_PATH ${CMAKE_BINARY_DIR}/3rdparty_download/download )
|
||||
endif()
|
||||
message( STATUS "Thirdparty downloaded file path: ${THIRDPARTY_DOWNLOAD_PATH}" )
|
||||
# ----------------------------------------------------------------------
|
||||
# Find pthreads
|
||||
|
||||
|
@ -24,7 +24,7 @@ FetchContent_Declare(
|
||||
opentracing
|
||||
URL ${OPENTRACING_SOURCE_URL}
|
||||
URL_MD5 "e598ba4b81ae8e1ceed8cd8bbf86f2fd"
|
||||
DOWNLOAD_DIR ${MILVUS_BINARY_DIR}/3rdparty_download/download
|
||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH}
|
||||
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/opentracing-src
|
||||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/opentracing-build
|
||||
)
|
||||
|
@ -25,7 +25,7 @@ FetchContent_Declare(
|
||||
protobuf
|
||||
URL ${GTEST_SOURCE_URL}
|
||||
URL_MD5 "9562b27cc6ac5ebd087f201f1310c885"
|
||||
DOWNLOAD_DIR ${MILVUS_BINARY_DIR}/3rdparty_download/download
|
||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH}
|
||||
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf-src
|
||||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf-build
|
||||
|
||||
|
@ -23,7 +23,7 @@ FetchContent_Declare(
|
||||
yaml-cpp
|
||||
URL ${YAMLCPP_SOURCE_URL}
|
||||
URL_MD5 "b45bf1089a382e81f6b661062c10d0c2"
|
||||
DOWNLOAD_DIR ${MILVUS_BINARY_DIR}/3rdparty_download/download
|
||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH}
|
||||
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/yaml-src
|
||||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/yaml-build
|
||||
)
|
||||
|
@ -21,7 +21,7 @@ TEST(Binary, Insert) {
|
||||
int64_t num_queries = 10;
|
||||
int64_t topK = 5;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("vecbin", DataType::VECTOR_BINARY, 128);
|
||||
schema->AddField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard);
|
||||
schema->AddField("age", DataType::INT64);
|
||||
auto dataset = DataGen(schema, N, 10);
|
||||
auto segment = CreateSegment(schema);
|
||||
|
@ -98,7 +98,49 @@ TEST(Expr, Range) {
|
||||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
ShowPlanNodeVisitor shower;
|
||||
Assert(plan->tag2field_.at("$0") == "fakevec");
|
||||
auto out = shower.call_child(*plan->plan_node_);
|
||||
std::cout << out.dump(4);
|
||||
}
|
||||
|
||||
TEST(Expr, RangeBinary) {
|
||||
SUCCEED();
|
||||
using namespace milvus;
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::string dsl_string = R"(
|
||||
{
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"range": {
|
||||
"age": {
|
||||
"GT": 1,
|
||||
"LT": 100
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"vector": {
|
||||
"fakevec": {
|
||||
"metric_type": "Jaccard",
|
||||
"params": {
|
||||
"nprobe": 10
|
||||
},
|
||||
"query": "$0",
|
||||
"topk": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
ShowPlanNodeVisitor shower;
|
||||
@ -140,7 +182,7 @@ TEST(Expr, InvalidRange) {
|
||||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string));
|
||||
}
|
||||
@ -179,7 +221,7 @@ TEST(Expr, InvalidDSL) {
|
||||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string));
|
||||
}
|
||||
@ -189,7 +231,7 @@ TEST(Expr, ShowExecutor) {
|
||||
using namespace milvus::segcore;
|
||||
auto node = std::make_unique<FloatVectorANNS>();
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
int64_t num_queries = 100L;
|
||||
auto raw_data = DataGen(schema, num_queries);
|
||||
auto& info = node->query_info_;
|
||||
@ -248,7 +290,7 @@ TEST(Expr, TestRange) {
|
||||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
|
||||
auto seg = CreateSegment(schema);
|
||||
|
@ -235,14 +235,14 @@ TEST(Indexing, IVFFlatNM) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Indexing, DISABLED_BinaryBruteForce) {
|
||||
TEST(Indexing, BinaryBruteForce) {
|
||||
int64_t N = 100000;
|
||||
int64_t num_queries = 10;
|
||||
int64_t topk = 5;
|
||||
int64_t dim = 64;
|
||||
int64_t dim = 512;
|
||||
auto result_count = topk * num_queries;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("vecbin", DataType::VECTOR_BINARY, dim);
|
||||
schema->AddField("vecbin", DataType::VECTOR_BINARY, dim, MetricType::METRIC_Jaccard);
|
||||
schema->AddField("age", DataType::INT64);
|
||||
auto dataset = DataGen(schema, N, 10);
|
||||
vector<float> distances(result_count);
|
||||
@ -250,8 +250,16 @@ TEST(Indexing, DISABLED_BinaryBruteForce) {
|
||||
auto bin_vec = dataset.get_col<uint8_t>(0);
|
||||
auto line_sizeof = schema->operator[](0).get_sizeof();
|
||||
auto query_data = 1024 * line_sizeof + bin_vec.data();
|
||||
query::BinarySearchBruteForce(faiss::MetricType::METRIC_Jaccard, line_sizeof, bin_vec.data(), N, topk, num_queries,
|
||||
query_data, distances.data(), ids.data());
|
||||
query::dataset::BinaryQueryDataset query_dataset{
|
||||
faiss::MetricType::METRIC_Jaccard, //
|
||||
num_queries, //
|
||||
topk, //
|
||||
line_sizeof, //
|
||||
query_data //
|
||||
};
|
||||
|
||||
query::BinarySearchBruteForce(query_dataset, bin_vec.data(), N, distances.data(), ids.data());
|
||||
|
||||
QueryResult qr;
|
||||
qr.num_queries_ = num_queries;
|
||||
qr.topK_ = topk;
|
||||
@ -264,76 +272,78 @@ TEST(Indexing, DISABLED_BinaryBruteForce) {
|
||||
[
|
||||
[
|
||||
"1024->0.000000",
|
||||
"86966->0.395349",
|
||||
"24843->0.404762",
|
||||
"13806->0.416667",
|
||||
"44313->0.421053"
|
||||
"43190->0.578804",
|
||||
"5255->0.586207",
|
||||
"23247->0.586486",
|
||||
"4936->0.588889"
|
||||
],
|
||||
[
|
||||
"1025->0.000000",
|
||||
"14226->0.348837",
|
||||
"1488->0.365854",
|
||||
"47337->0.377778",
|
||||
"20913->0.377778"
|
||||
"15147->0.562162",
|
||||
"49910->0.564304",
|
||||
"67435->0.567867",
|
||||
"38292->0.569921"
|
||||
],
|
||||
[
|
||||
"1026->0.000000",
|
||||
"81882->0.386364",
|
||||
"9215->0.409091",
|
||||
"95024->0.409091",
|
||||
"54987->0.414634"
|
||||
"15332->0.569061",
|
||||
"56391->0.572559",
|
||||
"17187->0.572603",
|
||||
"26988->0.573771"
|
||||
],
|
||||
[
|
||||
"1027->0.000000",
|
||||
"68981->0.394737",
|
||||
"75528->0.404762",
|
||||
"68794->0.405405",
|
||||
"21975->0.425000"
|
||||
"4502->0.559585",
|
||||
"25879->0.566234",
|
||||
"66937->0.566489",
|
||||
"21228->0.566845"
|
||||
],
|
||||
[
|
||||
"1028->0.000000",
|
||||
"90290->0.375000",
|
||||
"34309->0.394737",
|
||||
"58559->0.400000",
|
||||
"33865->0.400000"
|
||||
"38490->0.578804",
|
||||
"12946->0.581717",
|
||||
"31677->0.582173",
|
||||
"94474->0.583569"
|
||||
],
|
||||
[
|
||||
"1029->0.000000",
|
||||
"62722->0.388889",
|
||||
"89070->0.394737",
|
||||
"18528->0.414634",
|
||||
"94971->0.421053"
|
||||
"59011->0.551630",
|
||||
"82575->0.555263",
|
||||
"42914->0.561828",
|
||||
"23705->0.564171"
|
||||
],
|
||||
[
|
||||
"1030->0.000000",
|
||||
"67402->0.333333",
|
||||
"3988->0.347826",
|
||||
"86376->0.354167",
|
||||
"84381->0.361702"
|
||||
"39782->0.579946",
|
||||
"65553->0.589947",
|
||||
"82154->0.590028",
|
||||
"13374->0.590164"
|
||||
],
|
||||
[
|
||||
"1031->0.000000",
|
||||
"81569->0.325581",
|
||||
"12715->0.347826",
|
||||
"40332->0.363636",
|
||||
"21037->0.372093"
|
||||
"47826->0.582873",
|
||||
"72669->0.587432",
|
||||
"334->0.588076",
|
||||
"80652->0.589333"
|
||||
],
|
||||
[
|
||||
"1032->0.000000",
|
||||
"60536->0.428571",
|
||||
"93293->0.432432",
|
||||
"70969->0.435897",
|
||||
"64048->0.450000"
|
||||
"31968->0.573034",
|
||||
"63545->0.575758",
|
||||
"76913->0.575916",
|
||||
"6286->0.576000"
|
||||
],
|
||||
[
|
||||
"1033->0.000000",
|
||||
"99022->0.394737",
|
||||
"11763->0.405405",
|
||||
"50073->0.428571",
|
||||
"97118->0.428571"
|
||||
"95635->0.570248",
|
||||
"93439->0.574866",
|
||||
"6709->0.578534",
|
||||
"6367->0.579634"
|
||||
]
|
||||
]
|
||||
]
|
||||
)");
|
||||
ASSERT_EQ(json, ref);
|
||||
auto json_str = json.dump(2);
|
||||
auto ref_str = ref.dump(2);
|
||||
ASSERT_EQ(json_str, ref_str);
|
||||
}
|
||||
|
@ -72,7 +72,7 @@ TEST(Query, ShowExecutor) {
|
||||
using namespace milvus;
|
||||
auto node = std::make_unique<FloatVectorANNS>();
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
int64_t num_queries = 100L;
|
||||
auto raw_data = DataGen(schema, num_queries);
|
||||
auto& info = node->query_info_;
|
||||
@ -98,7 +98,7 @@ TEST(Query, DSL) {
|
||||
"must": [
|
||||
{
|
||||
"vector": {
|
||||
"Vec": {
|
||||
"fakevec": {
|
||||
"metric_type": "L2",
|
||||
"params": {
|
||||
"nprobe": 10
|
||||
@ -113,7 +113,7 @@ TEST(Query, DSL) {
|
||||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
auto res = shower.call_child(*plan->plan_node_);
|
||||
@ -123,7 +123,7 @@ TEST(Query, DSL) {
|
||||
{
|
||||
"bool": {
|
||||
"vector": {
|
||||
"Vec": {
|
||||
"fakevec": {
|
||||
"metric_type": "L2",
|
||||
"params": {
|
||||
"nprobe": 10
|
||||
@ -159,7 +159,7 @@ TEST(Query, ParsePlaceholderGroup) {
|
||||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
int64_t num_queries = 100000;
|
||||
int dim = 16;
|
||||
@ -172,7 +172,7 @@ TEST(Query, ExecWithPredicate) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::FLOAT);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
@ -217,8 +217,8 @@ TEST(Query, ExecWithPredicate) {
|
||||
int topk = 5;
|
||||
|
||||
Json json = QueryResultToJson(qr);
|
||||
|
||||
auto ref = Json::parse(R"([
|
||||
auto ref = Json::parse(R"(
|
||||
[
|
||||
[
|
||||
[
|
||||
"980486->3.149221",
|
||||
@ -257,15 +257,14 @@ TEST(Query, ExecWithPredicate) {
|
||||
]
|
||||
]
|
||||
])");
|
||||
|
||||
ASSERT_EQ(json, ref);
|
||||
ASSERT_EQ(json.dump(2), ref.dump(2));
|
||||
}
|
||||
|
||||
TEST(Query, ExecWihtoutPredicate) {
|
||||
TEST(Query, ExecWithoutPredicate) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::FLOAT);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
@ -301,18 +300,49 @@ TEST(Query, ExecWihtoutPredicate) {
|
||||
segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr);
|
||||
std::vector<std::vector<std::string>> results;
|
||||
int topk = 5;
|
||||
for (int q = 0; q < num_queries; ++q) {
|
||||
std::vector<std::string> result;
|
||||
for (int k = 0; k < topk; ++k) {
|
||||
int index = q * topk + k;
|
||||
result.emplace_back(std::to_string(qr.result_ids_[index]) + "->" +
|
||||
std::to_string(qr.result_distances_[index]));
|
||||
}
|
||||
results.emplace_back(std::move(result));
|
||||
}
|
||||
|
||||
Json json{results};
|
||||
std::cout << json.dump(2);
|
||||
auto json = QueryResultToJson(qr);
|
||||
auto ref = Json::parse(R"(
|
||||
[
|
||||
[
|
||||
[
|
||||
"980486->3.149221",
|
||||
"318367->3.661235",
|
||||
"302798->4.553688",
|
||||
"321424->4.757450",
|
||||
"565529->5.083780"
|
||||
],
|
||||
[
|
||||
"233390->7.931535",
|
||||
"238958->8.109344",
|
||||
"230645->8.439169",
|
||||
"901939->8.658772",
|
||||
"380328->8.731251"
|
||||
],
|
||||
[
|
||||
"749862->3.398494",
|
||||
"701321->3.632437",
|
||||
"897246->3.749835",
|
||||
"750683->3.897577",
|
||||
"105995->4.073595"
|
||||
],
|
||||
[
|
||||
"138274->3.454446",
|
||||
"124548->3.783290",
|
||||
"840855->4.782170",
|
||||
"936719->5.026924",
|
||||
"709627->5.063170"
|
||||
],
|
||||
[
|
||||
"810401->3.926393",
|
||||
"46575->4.054171",
|
||||
"201740->4.274491",
|
||||
"669040->4.399628",
|
||||
"231500->4.831223"
|
||||
]
|
||||
]
|
||||
]
|
||||
)");
|
||||
ASSERT_EQ(json.dump(2), ref.dump(2));
|
||||
}
|
||||
|
||||
TEST(Query, FillSegment) {
|
||||
@ -331,6 +361,9 @@ TEST(Query, FillSegment) {
|
||||
auto param = field->add_type_params();
|
||||
param->set_key("dim");
|
||||
param->set_value("16");
|
||||
auto iparam = field->add_index_params();
|
||||
iparam->set_key("metric_type");
|
||||
iparam->set_value("L2");
|
||||
}
|
||||
|
||||
{
|
||||
@ -392,3 +425,57 @@ TEST(Query, FillSegment) {
|
||||
++std_index;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Query, ExecWithPredicateBinary) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||
schema->AddField("age", DataType::FLOAT);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"range": {
|
||||
"age": {
|
||||
"GE": -1,
|
||||
"LT": 1
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"vector": {
|
||||
"fakevec": {
|
||||
"metric_type": "Jaccard",
|
||||
"params": {
|
||||
"nprobe": 10
|
||||
},
|
||||
"query": "$0",
|
||||
"topk": 5
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
})";
|
||||
int64_t N = 1000 * 1000;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = std::make_unique<SegmentSmallIndex>(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
auto vec_ptr = dataset.get_col<uint8_t>(0);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreateBinaryPlaceholderGroupFromBlob(num_queries, 512, vec_ptr.data() + 1024 * 512 / 8);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
QueryResult qr;
|
||||
Timestamp time = 1000000;
|
||||
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||
segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr);
|
||||
int topk = 5;
|
||||
|
||||
Json json = QueryResultToJson(qr);
|
||||
std::cout << json.dump(2);
|
||||
// ASSERT_EQ(json.dump(2), ref.dump(2));
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ TEST(SegmentCoreTest, NormalDistributionTest) {
|
||||
using namespace milvus::segcore;
|
||||
using namespace milvus::engine;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
int N = 1000 * 1000;
|
||||
auto [raw_data, timestamps, uids] = generate_data(N);
|
||||
@ -76,7 +76,7 @@ TEST(SegmentCoreTest, MockTest) {
|
||||
using namespace milvus::segcore;
|
||||
using namespace milvus::engine;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
std::vector<char> raw_data;
|
||||
std::vector<Timestamp> timestamps;
|
||||
@ -116,7 +116,7 @@ TEST(SegmentCoreTest, SmallIndex) {
|
||||
using namespace milvus::segcore;
|
||||
using namespace milvus::engine;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
||||
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddField("age", DataType::INT32);
|
||||
int N = 1024 * 1024;
|
||||
auto data = DataGen(schema, N);
|
||||
|
@ -167,7 +167,7 @@ CreateBinaryPlaceholderGroup(int64_t num_queries, int64_t dim, int64_t seed = 42
|
||||
ser::PlaceholderGroup raw_group;
|
||||
auto value = raw_group.add_placeholders();
|
||||
value->set_tag("$0");
|
||||
value->set_type(ser::PlaceholderType::VECTOR_FLOAT);
|
||||
value->set_type(ser::PlaceholderType::VECTOR_BINARY);
|
||||
std::default_random_engine e(seed);
|
||||
for (int i = 0; i < num_queries; ++i) {
|
||||
std::vector<uint8_t> vec;
|
||||
@ -175,7 +175,27 @@ CreateBinaryPlaceholderGroup(int64_t num_queries, int64_t dim, int64_t seed = 42
|
||||
vec.push_back(e());
|
||||
}
|
||||
// std::string line((char*)vec.data(), (char*)vec.data() + vec.size() * sizeof(float));
|
||||
value->add_values(vec.data(), vec.size() * sizeof(float));
|
||||
value->add_values(vec.data(), vec.size());
|
||||
}
|
||||
return raw_group;
|
||||
}
|
||||
|
||||
inline auto
|
||||
CreateBinaryPlaceholderGroupFromBlob(int64_t num_queries, int64_t dim, const uint8_t* ptr) {
|
||||
assert(dim % 8 == 0);
|
||||
namespace ser = milvus::proto::service;
|
||||
ser::PlaceholderGroup raw_group;
|
||||
auto value = raw_group.add_placeholders();
|
||||
value->set_tag("$0");
|
||||
value->set_type(ser::PlaceholderType::VECTOR_BINARY);
|
||||
for (int i = 0; i < num_queries; ++i) {
|
||||
std::vector<uint8_t> vec;
|
||||
for (int d = 0; d < dim / 8; ++d) {
|
||||
vec.push_back(*ptr);
|
||||
++ptr;
|
||||
}
|
||||
// std::string line((char*)vec.data(), (char*)vec.data() + vec.size() * sizeof(float));
|
||||
value->add_values(vec.data(), vec.size());
|
||||
}
|
||||
return raw_group;
|
||||
}
|
||||
|
@ -4,60 +4,126 @@ project(wrapper)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
if (NOT GIT_ARROW_REPO)
|
||||
set(GIT_ARROW_REPO "https://github.com/apache/arrow.git")
|
||||
endif ()
|
||||
message(STATUS "Arrow Repo:" ${GIT_ARROW_REPO})
|
||||
include( ExternalProject )
|
||||
set( ARROW_VERSION "2.0.0" )
|
||||
set( ARROW_SOURCE_URL
|
||||
"https://github.com/apache/arrow/archive/apache-arrow-${ARROW_VERSION}.tar.gz")
|
||||
|
||||
if (NOT GIT_ARROW_TAG)
|
||||
set(GIT_ARROW_TAG "apache-arrow-2.0.0")
|
||||
endif ()
|
||||
message(STATUS "Arrow Tag:" ${GIT_ARROW_TAG})
|
||||
if( CUSTOM_THIRDPARTY_DOWNLOAD_PATH )
|
||||
set( THIRDPARTY_DOWNLOAD_PATH ${CUSTOM_THIRDPARTY_DOWNLOAD_PATH} )
|
||||
else()
|
||||
set( THIRDPARTY_DOWNLOAD_PATH ${CMAKE_BINARY_DIR}/3rdparty_download/download )
|
||||
endif()
|
||||
message( STATUS "Thirdparty downloaded file path: ${THIRDPARTY_DOWNLOAD_PATH}" )
|
||||
|
||||
###################################################################################################
|
||||
# - cmake modules ---------------------------------------------------------------------------------
|
||||
macro( build_arrow )
|
||||
message( STATUS "Building ARROW-${ARROW_VERSION} from source" )
|
||||
|
||||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH})
|
||||
set( ARROW_CMAKE_ARGS
|
||||
"-DARROW_WITH_LZ4=OFF"
|
||||
"-DARROW_WITH_ZSTD=OFF"
|
||||
"-DARROW_WITH_BROTLI=OFF"
|
||||
"-DARROW_WITH_SNAPPY=OFF"
|
||||
"-DARROW_WITH_ZLIB=OFF"
|
||||
"-DARROW_BUILD_STATIC=ON"
|
||||
"-DARROW_BUILD_SHARED=OFF"
|
||||
"-DARROW_BOOST_USE_SHARED=OFF"
|
||||
"-DARROW_BUILD_TESTS=OFF"
|
||||
"-DARROW_TEST_MEMCHECK=OFF"
|
||||
"-DARROW_BUILD_BENCHMARKS=OFF"
|
||||
"-DARROW_CUDA=OFF"
|
||||
"-DARROW_JEMALLOC=OFF"
|
||||
"-DARROW_PYTHON=OFF"
|
||||
"-DARROW_BUILD_UTILITIES=OFF"
|
||||
"-DARROW_PARQUET=ON"
|
||||
"-DPARQUET_BUILD_SHARED=OFF"
|
||||
"-DARROW_S3=OFF"
|
||||
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||
"-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}"
|
||||
)
|
||||
|
||||
###################################################################################################
|
||||
# - build arrow ------------------------------------------------------------------------------------
|
||||
ExternalProject_Add(
|
||||
arrow-ep
|
||||
PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/arrow-subbuild
|
||||
BINARY_DIR arrow-bin
|
||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH}
|
||||
INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}
|
||||
SOURCE_SUBDIR "cpp"
|
||||
URL ${ARROW_SOURCE_URL}
|
||||
URL_MD5 "37fbe42e7f155dd76cf6f63257373b24"
|
||||
CMAKE_ARGS ${ARROW_CMAKE_ARGS}
|
||||
${EP_LOG_OPTIONS}
|
||||
)
|
||||
|
||||
message(STATUS "BUILDING ARROW")
|
||||
include(ConfigureArrow)
|
||||
ExternalProject_Get_Property( arrow-ep INSTALL_DIR )
|
||||
ExternalProject_Get_Property( arrow-ep BINARY_DIR )
|
||||
set( THRIFT_LOCATION ${BINARY_DIR}/thrift_ep-install )
|
||||
set( UTF8PROC_LOCATION ${BINARY_DIR}/utf8proc_ep-install )
|
||||
|
||||
if (ARROW_FOUND)
|
||||
message(STATUS "Apache Arrow found in ${ARROW_INCLUDE_DIR}")
|
||||
else ()
|
||||
message(FATAL_ERROR "Apache Arrow not found, please check your settings.")
|
||||
endif (ARROW_FOUND)
|
||||
if( NOT IS_DIRECTORY ${INSTALL_DIR}/include )
|
||||
file( MAKE_DIRECTORY "${INSTALL_DIR}/include" )
|
||||
endif()
|
||||
if( NOT IS_DIRECTORY ${THRIFT_LOCATION}/include )
|
||||
file( MAKE_DIRECTORY "${THRIFT_LOCATION}/include" )
|
||||
endif()
|
||||
if( NOT IS_DIRECTORY ${UTF8PROC_LOCATION}/include )
|
||||
file( MAKE_DIRECTORY "${UTF8PROC_LOCATION}/include" )
|
||||
endif()
|
||||
|
||||
add_library(arrow STATIC IMPORTED ${ARROW_LIB})
|
||||
add_library(parquet STATIC IMPORTED ${PARQUET_LIB})
|
||||
add_library(thrift STATIC IMPORTED ${THRIFT_LIB})
|
||||
add_library(utf8proc STATIC IMPORTED ${UTF8PROC_LIB})
|
||||
|
||||
if (ARROW_FOUND)
|
||||
set_target_properties(arrow PROPERTIES IMPORTED_LOCATION ${ARROW_LIB})
|
||||
set_target_properties(parquet PROPERTIES IMPORTED_LOCATION ${PARQUET_LIB})
|
||||
set_target_properties(thrift PROPERTIES IMPORTED_LOCATION ${THRIFT_LIB})
|
||||
set_target_properties(utf8proc PROPERTIES IMPORTED_LOCATION ${UTF8PROC_LIB})
|
||||
endif (ARROW_FOUND)
|
||||
add_library( thrift STATIC IMPORTED )
|
||||
set_target_properties( thrift
|
||||
PROPERTIES
|
||||
IMPORTED_GLOBAL TRUE
|
||||
IMPORTED_LOCATION ${THRIFT_LOCATION}/lib/libthrift.a
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${THRIFT_LOCATION}/include )
|
||||
add_dependencies(thrift arrow-ep)
|
||||
|
||||
###################################################################################################
|
||||
add_library( utf8proc STATIC IMPORTED )
|
||||
set_target_properties( utf8proc
|
||||
PROPERTIES
|
||||
IMPORTED_GLOBAL TRUE
|
||||
IMPORTED_LOCATION ${UTF8PROC_LOCATION}/lib/libutf8proc.a
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${UTF8PROC_LOCATION}/include )
|
||||
add_dependencies(utf8proc arrow-ep)
|
||||
|
||||
include_directories(${ARROW_INCLUDE_DIR})
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
add_library( arrow STATIC IMPORTED )
|
||||
set_target_properties( arrow
|
||||
PROPERTIES
|
||||
IMPORTED_GLOBAL TRUE
|
||||
IMPORTED_LOCATION ${INSTALL_DIR}/lib/libarrow.a
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include )
|
||||
add_dependencies(arrow arrow-ep )
|
||||
|
||||
add_library( parquet STATIC IMPORTED )
|
||||
set_target_properties( parquet
|
||||
PROPERTIES
|
||||
IMPORTED_GLOBAL TRUE
|
||||
IMPORTED_LOCATION ${INSTALL_DIR}/lib/libparquet.a
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include )
|
||||
add_dependencies(parquet arrow-ep)
|
||||
target_link_libraries(parquet INTERFACE arrow thrift utf8proc)
|
||||
endmacro()
|
||||
|
||||
build_arrow()
|
||||
|
||||
add_library(wrapper STATIC)
|
||||
target_sources(wrapper PUBLIC ParquetWrapper.cpp
|
||||
PayloadStream.cpp)
|
||||
|
||||
target_link_libraries(wrapper PUBLIC parquet arrow thrift utf8proc pthread)
|
||||
target_sources(wrapper PUBLIC ParquetWrapper.cpp PayloadStream.cpp
|
||||
|
||||
)
|
||||
set_target_properties( wrapper PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR} )
|
||||
target_link_libraries(wrapper PUBLIC parquet pthread)
|
||||
|
||||
if(NOT CMAKE_INSTALL_PREFIX)
|
||||
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR})
|
||||
endif()
|
||||
install(TARGETS wrapper DESTINATION ${CMAKE_INSTALL_PREFIX})
|
||||
install(FILES ${ARROW_LIB} ${PARQUET_LIB} ${THRIFT_LIB} ${UTF8PROC_LIB} DESTINATION ${CMAKE_INSTALL_PREFIX})
|
||||
|
||||
add_subdirectory(test)
|
||||
get_target_property( THRIFT_LIB thrift LOCATION )
|
||||
get_target_property( ARROW_LIB arrow LOCATION )
|
||||
get_target_property( PARQUET_LIB parquet LOCATION )
|
||||
get_target_property( UTF8PROC_LIB utf8proc LOCATION )
|
||||
install(TARGETS wrapper DESTINATION ${CMAKE_INSTALL_PREFIX})
|
||||
install(
|
||||
FILES ${ARROW_LIB} ${PARQUET_LIB} ${THRIFT_LIB} ${UTF8PROC_LIB} DESTINATION ${CMAKE_INSTALL_PREFIX})
|
||||
|
||||
add_subdirectory(test)
|
||||
|
@ -22,11 +22,13 @@ fi
|
||||
mkdir ${OUTPUT_LIB}
|
||||
|
||||
BUILD_TYPE="Debug"
|
||||
GIT_ARROW_REPO="https://github.com/apache/arrow.git"
|
||||
GIT_ARROW_TAG="apache-arrow-2.0.0"
|
||||
CUSTOM_THIRDPARTY_PATH=""
|
||||
|
||||
while getopts "a:b:t:h" arg; do
|
||||
while getopts "a:b:t:h:f:" arg; do
|
||||
case $arg in
|
||||
f)
|
||||
CUSTOM_THIRDPARTY_PATH=$OPTARG
|
||||
;;
|
||||
t)
|
||||
BUILD_TYPE=$OPTARG # BUILD_TYPE
|
||||
;;
|
||||
@ -40,6 +42,7 @@ while getopts "a:b:t:h" arg; do
|
||||
echo "-t: build type(default: Debug)
|
||||
-a: arrow repo(default: https://github.com/apache/arrow.git)
|
||||
-b: arrow tag(default: apache-arrow-2.0.0)
|
||||
-f: custom thirdparty path(default:)
|
||||
-h: help
|
||||
"
|
||||
exit 0
|
||||
@ -51,8 +54,18 @@ while getopts "a:b:t:h" arg; do
|
||||
esac
|
||||
done
|
||||
echo "BUILD_TYPE: " $BUILD_TYPE
|
||||
echo "GIT_ARROW_REPO: " $GIT_ARROW_REPO
|
||||
echo "GIT_ARROW_TAG: " $GIT_ARROW_TAG
|
||||
echo "CUSTOM_THIRDPARTY_PATH: " $CUSTOM_THIRDPARTY_PATH
|
||||
|
||||
pushd ${CMAKE_BUILD}
|
||||
cmake -DCMAKE_INSTALL_PREFIX=${OUTPUT_LIB} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DGIT_ARROW_REPO=${GIT_ARROW_REPO} -DGIT_ARROW_TAG=${GIT_ARROW_TAG} .. && make && make install
|
||||
CMAKE_CMD="cmake \
|
||||
-DCMAKE_INSTALL_PREFIX=${OUTPUT_LIB} \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
|
||||
-DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} .."
|
||||
|
||||
${CMAKE_CMD}
|
||||
echo ${CMAKE_CMD}
|
||||
|
||||
if [[ ! ${jobs+1} ]]; then
|
||||
jobs=$(nproc)
|
||||
fi
|
||||
make -j ${jobs} && make install
|
||||
|
@ -1,98 +0,0 @@
|
||||
set(ARROW_ROOT ${CMAKE_BINARY_DIR}/arrow)
|
||||
|
||||
set(ARROW_CMAKE_ARGS " -DARROW_WITH_LZ4=OFF"
|
||||
" -DARROW_WITH_ZSTD=OFF"
|
||||
" -DARROW_WITH_BROTLI=OFF"
|
||||
" -DARROW_WITH_SNAPPY=OFF"
|
||||
" -DARROW_WITH_ZLIB=OFF"
|
||||
" -DARROW_BUILD_STATIC=ON"
|
||||
" -DARROW_BUILD_SHARED=OFF"
|
||||
" -DARROW_BOOST_USE_SHARED=OFF"
|
||||
" -DARROW_BUILD_TESTS=OFF"
|
||||
" -DARROW_TEST_MEMCHECK=OFF"
|
||||
" -DARROW_BUILD_BENCHMARKS=OFF"
|
||||
" -DARROW_CUDA=OFF"
|
||||
" -DARROW_JEMALLOC=OFF"
|
||||
" -DARROW_PYTHON=OFF"
|
||||
" -DARROW_BUILD_UTILITIES=OFF"
|
||||
" -DARROW_PARQUET=ON"
|
||||
" -DPARQUET_BUILD_SHARED=OFF"
|
||||
" -DARROW_S3=OFF"
|
||||
" -DCMAKE_VERBOSE_MAKEFILE=ON")
|
||||
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/Templates/Arrow.CMakeLists.txt.cmake"
|
||||
"${ARROW_ROOT}/CMakeLists.txt")
|
||||
|
||||
file(MAKE_DIRECTORY "${ARROW_ROOT}/build")
|
||||
file(MAKE_DIRECTORY "${ARROW_ROOT}/install")
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
|
||||
RESULT_VARIABLE ARROW_CONFIG
|
||||
WORKING_DIRECTORY ${ARROW_ROOT})
|
||||
|
||||
if(ARROW_CONFIG)
|
||||
message(FATAL_ERROR "Configuring Arrow failed: " ${ARROW_CONFIG})
|
||||
endif(ARROW_CONFIG)
|
||||
|
||||
#set(PARALLEL_BUILD -j)
|
||||
#if($ENV{PARALLEL_LEVEL})
|
||||
# set(NUM_JOBS $ENV{PARALLEL_LEVEL})
|
||||
# set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}")
|
||||
#endif($ENV{PARALLEL_LEVEL})
|
||||
set(NUM_JOBS 4)
|
||||
set(PARALLEL_BUILD "-j${NUM_JOBS}")
|
||||
|
||||
|
||||
if(${NUM_JOBS})
|
||||
if(${NUM_JOBS} EQUAL 1)
|
||||
message(STATUS "ARROW BUILD: Enabling Sequential CMake build")
|
||||
elseif(${NUM_JOBS} GREATER 1)
|
||||
message(STATUS "ARROW BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs")
|
||||
endif(${NUM_JOBS} EQUAL 1)
|
||||
else()
|
||||
message(STATUS "ARROW BUILD: Enabling Parallel CMake build with all threads")
|
||||
endif(${NUM_JOBS})
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD}
|
||||
RESULT_VARIABLE ARROW_BUILD
|
||||
WORKING_DIRECTORY ${ARROW_ROOT}/build)
|
||||
|
||||
if(ARROW_BUILD)
|
||||
message(FATAL_ERROR "Building Arrow failed: " ${ARROW_BUILD})
|
||||
endif(ARROW_BUILD)
|
||||
|
||||
message(STATUS "Arrow installed here: " ${ARROW_ROOT}/install)
|
||||
set(ARROW_LIBRARY_DIR "${ARROW_ROOT}/install/lib")
|
||||
set(ARROW_INCLUDE_DIR "${ARROW_ROOT}/install/include")
|
||||
|
||||
find_library(ARROW_LIB arrow
|
||||
NO_DEFAULT_PATH
|
||||
HINTS "${ARROW_LIBRARY_DIR}")
|
||||
message(STATUS "Arrow library: " ${ARROW_LIB})
|
||||
|
||||
find_library(PARQUET_LIB parquet
|
||||
NO_DEFAULT_PATH
|
||||
HINTS "${ARROW_LIBRARY_DIR}")
|
||||
message(STATUS "Parquet library: " ${PARQUET_LIB})
|
||||
|
||||
find_library(THRIFT_LIB thrift
|
||||
NO_DEFAULT_PATH
|
||||
HINTS "${ARROW_ROOT}/build/thrift_ep-install/lib")
|
||||
message(STATUS "Thirft library: " ${THRIFT_LIB})
|
||||
|
||||
find_library(UTF8PROC_LIB utf8proc
|
||||
NO_DEFAULT_PATH
|
||||
HINTS "${ARROW_ROOT}/build/utf8proc_ep-install/lib")
|
||||
message(STATUS "utf8proc library: " ${UTF8PROC_LIB})
|
||||
|
||||
if(ARROW_LIB AND PARQUET_LIB AND THRIFT_LIB AND UTF8PROC_LIB)
|
||||
set(ARROW_FOUND TRUE)
|
||||
endif(ARROW_LIB AND PARQUET_LIB AND THRIFT_LIB AND UTF8PROC_LIB)
|
||||
|
||||
# message(STATUS "FlatBuffers installed here: " ${FLATBUFFERS_ROOT})
|
||||
# set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_ROOT}/include")
|
||||
# set(FLATBUFFERS_LIBRARY_DIR "${FLATBUFFERS_ROOT}/lib")
|
||||
|
||||
add_definitions(-DARROW_METADATA_V4)
|
@ -1,30 +0,0 @@
|
||||
#=============================================================================
|
||||
# Copyright (c) 2018-2020, NVIDIA CORPORATION.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#=============================================================================
|
||||
cmake_minimum_required(VERSION 3.14...3.17 FATAL_ERROR)
|
||||
|
||||
project(wrapper-Arrow)
|
||||
|
||||
include(ExternalProject)
|
||||
|
||||
ExternalProject_Add(Arrow
|
||||
GIT_REPOSITORY ${GIT_ARROW_REPO}
|
||||
GIT_TAG ${GIT_ARROW_TAG}
|
||||
GIT_SHALLOW true
|
||||
SOURCE_DIR "${ARROW_ROOT}/arrow"
|
||||
SOURCE_SUBDIR "cpp"
|
||||
BINARY_DIR "${ARROW_ROOT}/build"
|
||||
INSTALL_DIR "${ARROW_ROOT}/install"
|
||||
CMAKE_ARGS ${ARROW_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${ARROW_ROOT}/install)
|
@ -10,12 +10,9 @@ FetchContent_MakeAvailable(googletest)
|
||||
|
||||
target_link_libraries(wrapper_test
|
||||
gtest_main
|
||||
pthread
|
||||
wrapper
|
||||
parquet arrow thrift utf8proc pthread
|
||||
parquet
|
||||
)
|
||||
|
||||
install(TARGETS wrapper_test DESTINATION ${CMAKE_INSTALL_PREFIX})
|
||||
|
||||
# Defines `gtest_discover_tests()`.
|
||||
#include(GoogleTest)
|
||||
#gtest_discover_tests(milvusd_test)
|
@ -23,11 +23,13 @@ fi
|
||||
mkdir ${OUTPUT_LIB}
|
||||
|
||||
BUILD_TYPE="Debug"
|
||||
GIT_ARROW_REPO="https://github.com/apache/arrow.git"
|
||||
GIT_ARROW_TAG="apache-arrow-2.0.0"
|
||||
CUSTOM_THIRDPARTY_PATH=""
|
||||
|
||||
while getopts "a:b:t:h" arg; do
|
||||
while getopts "a:b:t:h:f:" arg; do
|
||||
case $arg in
|
||||
f)
|
||||
CUSTOM_THIRDPARTY_PATH=$OPTARG
|
||||
;;
|
||||
t)
|
||||
BUILD_TYPE=$OPTARG # BUILD_TYPE
|
||||
;;
|
||||
@ -41,6 +43,7 @@ while getopts "a:b:t:h" arg; do
|
||||
echo "-t: build type(default: Debug)
|
||||
-a: arrow repo(default: https://github.com/apache/arrow.git)
|
||||
-b: arrow tag(default: apache-arrow-2.0.0)
|
||||
-f: custom thirdparty path(default: "")
|
||||
-h: help
|
||||
"
|
||||
exit 0
|
||||
@ -52,8 +55,18 @@ while getopts "a:b:t:h" arg; do
|
||||
esac
|
||||
done
|
||||
echo "BUILD_TYPE: " $BUILD_TYPE
|
||||
echo "GIT_ARROW_REPO: " $GIT_ARROW_REPO
|
||||
echo "GIT_ARROW_TAG: " $GIT_ARROW_TAG
|
||||
echo "CUSTOM_THIRDPARTY_PATH: " $CUSTOM_THIRDPARTY_PATH
|
||||
|
||||
pushd ${CMAKE_BUILD}
|
||||
cmake -DCMAKE_INSTALL_PREFIX=${OUTPUT_LIB} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DGIT_ARROW_REPO=${GIT_ARROW_REPO} -DGIT_ARROW_TAG=${GIT_ARROW_TAG} ${SRC_DIR} && make && make install
|
||||
CMAKE_CMD="cmake \
|
||||
-DCMAKE_INSTALL_PREFIX=${OUTPUT_LIB} \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
|
||||
-DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} ${SRC_DIR}"
|
||||
|
||||
${CMAKE_CMD}
|
||||
echo ${CMAKE_CMD}
|
||||
|
||||
if [[ ! ${jobs+1} ]]; then
|
||||
jobs=$(nproc)
|
||||
fi
|
||||
make -j ${jobs} && make install
|
||||
|
@ -260,8 +260,8 @@ def gen_binary_default_fields(auto_id=True):
|
||||
"fields": [
|
||||
{"name": "int64", "type": DataType.INT64, "is_primary_key": not auto_id},
|
||||
{"name": "float", "type": DataType.FLOAT},
|
||||
{"name": default_binary_vec_field_name, "type": DataType.BINARY_VECTOR, "params": {"dim": default_dim}}
|
||||
],
|
||||
{"name": default_binary_vec_field_name, "type": DataType.BINARY_VECTOR, "params": {"dim": default_dim}, "indexes": [{"metric_type": "JACCARD"}]}
|
||||
],
|
||||
"segment_row_limit": default_segment_row_limit,
|
||||
"auto_id": auto_id
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user