Add segment threshold factor

Signed-off-by: neza2017 <yefu.chen@zilliz.com>
This commit is contained in:
neza2017 2020-11-23 11:41:53 +08:00 committed by yefu.chen
parent 578e952c60
commit 3277987898
19 changed files with 1135 additions and 182 deletions

View File

@ -23,21 +23,444 @@ In this section, we introduce the RPCs of milvus service. A brief description of
#### 3.1 Definition Requests
###### 3.2.1 Collection
###### 3.1.1 CreateCollection
* CreateCollection
* DropCollection
* HasCollection
* DescribeCollection
* ShowCollections
**Interface:**
###### 3.2.2 Partition
```
rpc CreateCollection(schema.CollectionSchema) returns (common.Status){}
```
* CreatePartition
* DropPartition
* HasPartition
* DescribePartition
* ShowPartitions
**Description:**
Create a collection through collection schema.
**Parameters:**
- **schema.CollectionSchema**
CollectionSchema struct is shown as follows:
```protobuf
message CollectionSchema {
string name = 1;
string description = 2;
bool autoID = 3;
repeated FieldSchema fields = 4;
}
```
Collection schema contains all the base information of a collection including **collection name**, **description**, **autoID** and **fields**. Collection description is defined by database manager to describe the collection. **autoID** determines whether the ID of each row of data is user-defined. If **autoID** is true, our system will generate a unique ID for each data. If **autoID** is false, user need to give each entity a ID when insert.
**Fields** is a list of **FieldSchema**. Each schema should include Field **name**, **description**, **dataType**, **type_params** and **index_params**.
FieldSchema struct is shown as follows:
```protobuf
message FieldSchema {
string name = 1;
string description = 2;
DataType data_type = 3;
repeated common.KeyValuePair type_params = 4;
repeated common.KeyValuePair index_params = 5;
}
```
**Field schema** contains all the base information of a field including field **name**, **description**, **data_type**, **type_params** and **index_params**. **data_type** is a enum type to distingush different data type.Total enum is shown in the last of this doc
**type_params** contains the detailed information of data_type. For example, vector data type should include dimension information. You can give a pair of <dim, 8> to let the field store 8-dimension vector.
**index_params**For fast search, you build index for field. You specify detailed index information for a field. Detailed information about index can be seen in chapter 2.2.3
**Returns:**
- **common.Status**
```protobuf
message Status {
ErrorCode error_code = 1;
string reason = 2;
}
```
**Status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
###### 3.1.2 DropCollection
**Interface:**
```
rpc DropCollection(CollectionName) returns (common.Status) {}
```
**Description:**
This method is used to delete collection.
**Parameters:**
- **CollectionName**
CollectionName struct is shown as follows:
```protobuf
message CollectionName {
string collection_name = 1;
}
```
**CollectionName** contains only a string named **collection_name**. Collection with the same collection_name is going to be deleted.
**Returns:**
- **common.Status**
```protobuf
message Status {
ErrorCode error_code = 1;
string reason = 2;
}
```
**Status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
###### 3.1.3 HasCollection
**Interface:**
```
rpc HasCollection(CollectionName) returns (BoolResponse) {}
```
**Description:**
This method is used to test collection existence.
**Parameters:**
- **CollectionName**
CollectionName struct is shown as follows:
```protobuf
message CollectionName {
string collection_name = 1;
}
```
**CollectionName** contains only a string named **collection_name**. The server finds the collection through collection_name and judge whether the collection exists.
**Returns:**
- **BoolResponse**
```protobuf
message BoolResponse {
common.Status status = 1;
bool value = 2;
}
```
**status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
**value** represents whether the collection exists. If collection exists, value will be true. If collection doesn't exist, value will be false.
###### 3.1.4 DescribeCollection
**Interface:**
```
rpc DescribeCollection(CollectionName) returns (CollectionDescription) {}
```
**Description:**
This method is used to get collection schema.
**Parameters:**
- **CollectionName**
CollectionName struct is shown as follows:
```protobuf
message CollectionName {
string collection_name = 1;
}
```
**CollectionName** contains only a string named **collection_name**. The server finds the collection through collection_name and get detailed collection information
**Returns:**
- **CollectionDescription**
```protobuf
message CollectionDescription {
common.Status status = 1;
schema.CollectionSchema schema = 2;
repeated common.KeyValuePair statistics = 3;
}
```
**status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
**schema** is collection schema same as the collection schema in [CreateCollection](#311-createcollection).
**statitistics** is a statistic used to count various information, such as the number of segments, how many rows there are, the number of visits in the last hour, etc.
###### 3.1.5 ShowCollections
**Interface:**
```
rpc ShowCollections(common.Empty) returns (StringListResponse) {}
```
**Description:**
This method is used to get collection schema.
**Parameters:** None
**Returns:**
- **StringListResponse**
```protobuf
message StringListResponse {
common.Status status = 1;
repeated string values = 2;
}
```
**status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
**values** is a list contains all collections' name.
###### 3.1.6 CreatePartition
**Interface:**
```
rpc CreatePartition(PartitionName) returns (common.Status) {}
```
**Description:**
This method is used to create partition
**Parameters:**
- **PartitionName**
PartitionName struct is shown as follows:
```protobuf
message PartitionName {
string partition_name = 1;
}
```
**PartitionName** contains only a string named **partition_name**. The server creates partition with the partition_name
- **Returns:**
- **common.Status**
```protobuf
message Status {
ErrorCode error_code = 1;
string reason = 2;
}
```
**Status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
###### 3.1.7 DropPartition
**Interface:**
```
rpc DropPartition(PartitionName) returns (common.Status) {}
```
**Description:**
This method is used to drop partition.
**Parameters:**
- **PartitionName**
PartitionName struct is shown as follows:
```protobuf
message PartitionName {
string partition_name = 1;
}
```
**PartitionName** contains only a string named **partition_name**. Partition with the same partition_name is going to be deleted.
**Returns:**
- **common.Status**
```protobuf
message Status {
ErrorCode error_code = 1;
string reason = 2;
}
```
**Status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
###### 3.1.8 HasPartition
**Interface:**
```
rpc HasPartition(PartitionName) returns (BoolResponse) {}
```
**Description:**
This method is used to test partition existence.
**Parameters:**
- **PartitionName**
PartitionName struct is shown as follows:
```protobuf
message PartitionName {
string partition_name = 1;
}
```
**PartitionName** contains only a string named **partition_name**. Partition with the same partition_name is going to be tested.
**Returns:**
- **BoolResponse**
```protobuf
message BoolResponse {
common.Status status = 1;
bool value = 2;
}
```
**status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
**value** represents whether the partition exists. If partition exists, value will be true. If partition doesn't exist, value will be false.
###### 3.1.9 DescribePartition
**Interface:**
```
rpc DescribePartition(PartitionName) returns (PartitionDescription) {}
```
**Description:**
This method is used to show partition information
**Parameters:**
- **PartitionName**
PartitionName struct is shown as follows:
```protobuf
message PartitionName {
string partition_name = 1;
}
```
**PartitionName** contains only a string named **partition_name**. The server finds the partition through partition_name and get detailed partition information
**Returns:**
- **PartitionDescription**
```protobuf
message PartitionDescription {
common.Status status = 1;
PartitionName name = 2;
repeated common.KeyValuePair statistics = 3;
}
```
**status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
**name** is partition_name same as the PartitionName in [CreatePartition](#316-createpartition).
**statitistics** is a statistic used to count various information, such as the number of segments, how many rows there are, the number of visits in the last hour, etc.
###### 3.1.10 ShowPartitions
**Interface:**
```
rpc ShowPartitions(CollectionName) returns (StringListResponse) {}
```
**Description:**
This method is used to get partition description.
**Parameters:**
- **CollectionName**
CollectionName struct is shown as follows:
```protobuf
message CollectionName {
string collection_name = 1;
}
```
**CollectionName** contains only a string named **collection_name**. Partition with the same collection_name is going to be listed.
**Returns:**
- **StringListResponse**
```protobuf
message StringListResponse {
common.Status status = 1;
repeated string values = 2;
}
```
**status** represents the server error code. It doesn't contains grpc error but contains the server error code. We can get the executing result in common status. **error_code** is a enum type to distingush the executing error type. The total Errorcode is shown in the last of this code. And the **reason** field is a string to describes the detailed error.
**values** is a list contains all partitions' name.

View File

@ -1,2 +1,36 @@
## Appendix D. Error Code
**ErrorCode**
```protobuf
enum ErrorCode {
SUCCESS = 0;
UNEXPECTED_ERROR = 1;
CONNECT_FAILED = 2;
PERMISSION_DENIED = 3;
COLLECTION_NOT_EXISTS = 4;
ILLEGAL_ARGUMENT = 5;
ILLEGAL_DIMENSION = 7;
ILLEGAL_INDEX_TYPE = 8;
ILLEGAL_COLLECTION_NAME = 9;
ILLEGAL_TOPK = 10;
ILLEGAL_ROWRECORD = 11;
ILLEGAL_VECTOR_ID = 12;
ILLEGAL_SEARCH_RESULT = 13;
FILE_NOT_FOUND = 14;
META_FAILED = 15;
CACHE_FAILED = 16;
CANNOT_CREATE_FOLDER = 17;
CANNOT_CREATE_FILE = 18;
CANNOT_DELETE_FOLDER = 19;
CANNOT_DELETE_FILE = 20;
BUILD_INDEX_ERROR = 21;
ILLEGAL_NLIST = 22;
ILLEGAL_METRIC_TYPE = 23;
OUT_OF_MEMORY = 24;
// internal error code.
DD_REQUEST_RACE = 1000;
}
```

View File

@ -27,7 +27,487 @@ type FieldSchema struct {
###### 2.2.1 Data Types
**DataType**
```protobuf
enum DataType {
NONE = 0;
BOOL = 1;
INT8 = 2;
INT16 = 3;
INT32 = 4;
INT64 = 5;
FLOAT = 10;
DOUBLE = 11;
STRING = 20;
VECTOR_BINARY = 100;
VECTOR_FLOAT = 101;
}
```
###### 2.2.2 Type Params
###### 2.2.3 Index Params
# Intro to Index
For more detailed information about indexes, please refer to [Milvus documentation index chapter.](https://milvus.io/docs/v0.11.0/index.md)
To learn how to choose an appropriate index for your application scenarios, please read [How to Select an Index in Milvus](https://medium.com/@milvusio/how-to-choose-an-index-in-milvus-4f3d15259212).
To learn how to choose an appropriate index for a metric, see [Distance Metrics](https://www.milvus.io/docs/v0.11.0/metric.md).
Different index types use different index params in construction and query. All index params are represented by the structure of map. This doc shows the map code in python.
[IVF_FLAT](#IVF_FLAT)
[BIN_IVF_FLAT](#BIN_IVF_FLAT)
[IVF_PQ](#IVF_PQ)
[IVF_SQ8](#IVF_SQ8)
[IVF_SQ8_HYBRID](#IVF_SQ8_HYBRID)
[ANNOY](#ANNOY)
[HNSW](#HNSW)
[RHNSW_PQ](#RHNSW_PQ)
[RHNSW_SQ](#RHNSW_SQ)
[NSG](#NSG)
## IVF_FLAT
**IVF** (*Inverted File*) is an index type based on quantization. It divides the points in space into `nlist`
units by clustering method. During searching vectors, it compares the distances between the target vector
and the center of all the units, and then select the `nprobe` nearest unit. Then, it compares all the vectors
in these selected cells to get the final result.
IVF_FLAT is the most basic IVF index, and the encoded data stored in each unit is consistent with the original data.
- building parameters:
**nlist**: Number of cluster units.
```python
# IVF_FLAT
{
"index_type": "IVF_FLAT",
"metric_type": "L2", # one of L2, IP
#Special for IVF_FLAT
"nlist": 100 # int. 1~65536
}
```
- search parameters:
**nprobe**: Number of inverted file cell to probe.
```python
# IVF_FLAT
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for IVF_FLAT
"nprobe": 8 # int. 1~nlist(cpu), 1~min[2048, nlist](gpu)
}
```
## BIN_IVF_FLAT
**BIN_IVF_FLAT** is a binary variant of IVF_FLAT.
- building parameters:
**nlist**: Number of cluster units.
```python
# BIN_IVF_FLAT
{
"index_type": "BIN_IVF_FLAT",
"metric_type": "jaccard", # one of jaccard, hamming, tanimoto
#Special for BIN_IVF_FLAT
"nlist": 100 # int. 1~65536
}
```
- search parameters:
**nprobe**: Number of inverted file cell to probe.
```python
# BIN_IVF_FLAT
{
"topk": top_k,
"query": queries,
#Special for BIN_IVF_FLAT
"metric_type": "jaccard", # one of jaccard, hamming, tanimoto
"nprobe": 8 # int. 1~nlist(cpu), 1~min[2048, nlist](gpu)
}
```
## IVF_PQ
**PQ** (*Product Quantization*) uniformly decomposes the original high-dimensional vector space into
Cartesian products of `m` low-dimensional vector spaces, and then quantizes the decomposed low-dimensional
vector spaces. Instead of calculating the distances between the target vector and the center of all the units,
product quantization enables the calculation of distances between the target vector and the clustering center
of each low-dimensional space and greatly reduces the time complexity and space complexity of the algorithm.
IVF_PQ performs IVF index clustering, and then quantizes the product of vectors. Its index file is even
smaller than IVF_SQ8, but it also causes a loss of accuracy during searching.
- building parameters:
**nlist**: Number of cluster units.
**m**: Number of factors of product quantization. **CPU-only** Milvus: `m ≡ dim (mod m)`; **GPU-enabled** Milvus: `m` ∈ {1, 2, 3, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 96}, and (dim / m) ∈ {1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32}. (`m` x 1024) ≥ `MaxSharedMemPerBlock` of your graphics card.
```python
# IVF_PQ
{
"index_type": "IVF_PQ",
"metric_type": "L2", # one of L2, IP
#Special for IVF_PQ
"nlist": 100, # int. 1~65536
"m": 8
}
```
- search parameters:
**nprobe**: Number of inverted file cell to probe.
```python
# IVF_PQ
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for IVF_PQ
"nprobe": 8 # int. 1~nlist(cpu), 1~min[2048, nlist](gpu)
}
```
## IVF_SQ8
**IVF_SQ8** does scalar quantization for each vector placed in the unit based on IVF. Scalar quantization
converts each dimension of the original vector from a 4-byte floating-point number to a 1-byte unsigned integer,
so the IVF_SQ8 index file occupies much less space than the IVF_FLAT index file.
However, scalar quantization results in a loss of accuracy during searching vectors.
- building parameters:
**nlist**: Number of cluster units.
```python
# IVF_SQ8
{
"index_type": "IVF_SQ8",
"metric_type": "L2", # one of L2, IP
#Special for IVF_SQ8
"nlist": 100 # int. 1~65536
}
```
- search parameters:
**nprobe**: Number of inverted file cell to probe.
```python
# IVF_SQ8
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for IVF_SQ8
"nprobe": 8 # int. 1~nlist(cpu), 1~min[2048, nlist](gpu)
}
```
## IVF_SQ8_HYBRID
Optimized version of IVF_SQ8 that requires both CPU and GPU to work. Unlike IVF_SQ8, IVF_SQ8H uses a GPU-based
coarse quantizer, which greatly reduces time to quantize.
IVF_SQ8H is an IVF_SQ8 index that optimizes query execution.
The query method is as follows:
- If `nq``gpu_search_threshold`, GPU handles the entire query task.
- If `nq` < `gpu_search_threshold`, GPU handles the task of retrieving the `nprobe` nearest unit in the IVF
index file, and CPU handles the rest.
- building parameters:
**nlist**: Number of cluster units.
```python
# IVF_SQ8_HYBRID
{
"index_type": "IVF_SQ8_HYBRID",
"metric_type": "L2", # one of L2, IP
#Special for IVF_SQ8_HYBRID
"nlist": 100 # int. 1~65536
}
```
- search parameters:
**nprobe**: Number of inverted file cell to probe.
```python
# IVF_SQ8_HYBRID
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for IVF_SQ8_HYBRID
"nprobe": 8 # int. 1~nlist(cpu), 1~min[2048, nlist](gpu)
}
```
## ANNOY
**ANNOY** (*Approximate Nearest Neighbors Oh Yeah*) is an index that uses a hyperplane to divide a
high-dimensional space into multiple subspaces, and then stores them in a tree structure.
When searching for vectors, ANNOY follows the tree structure to find subspaces closer to the target vector,
and then compares all the vectors in these subspaces (The number of vectors being compared should not be
less than `search_k`) to obtain the final result. Obviously, when the target vector is close to the edge of
a certain subspace, sometimes it is necessary to greatly increase the number of searched subspaces to obtain
a high recall rate. Therefore, ANNOY uses `n_trees` different methods to divide the whole space, and searches
all the dividing methods simultaneously to reduce the probability that the target vector is always at the edge of the subspace.
- building parameters:
**n_trees**: The number of methods of space division.
```python
# ANNOY
{
"index_type": "ANNOY",
"metric_type": "L2", # one of L2, IP
#Special for ANNOY
"n_trees": 8 # int. 1~1024
}
```
- search parameters:
**search_k**: The number of nodes to search. -1 means 5% of the whole data.
```python
# ANNOY
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for ANNOY
"search_k": -1 # int. {-1} U [top_k, n*n_trees], n represents vectors count.
}
```
## HNSW
**HNSW** (*Hierarchical Navigable Small World Graph*) is a graph-based indexing algorithm. It builds a
multi-layer navigation structure for an image according to certain rules. In this structure, the upper
layers are more sparse and the distances between nodes are farther; the lower layers are denser and
he distances between nodes are closer. The search starts from the uppermost layer, finds the node closest
to the target in this layer, and then enters the next layer to begin another search. After multiple iterations,
it can quickly approach the target position.
In order to improve performance, HNSW limits the maximum degree of nodes on each layer of the graph to `M`.
In addition, you can use `efConstruction` (when building index) or `ef` (when searching targets) to specify a search range.
- building parameters:
**M**: Maximum degree of the node.
**efConstruction**: Take the effect in stage of index construction.
```python
# HNSW
{
"index_type": "HNSW",
"metric_type": "L2", # one of L2, IP
#Special for HNSW
"M": 16, # int. 4~64
"efConstruction": 40 # int. 8~512
}
```
- search parameters:
**ef**: Take the effect in stage of search scope, should be larger than `top_k`.
```python
# HNSW
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for HNSW
"ef": 64 # int. top_k~32768
}
```
## RHNSW_PQ
**RHNSW_PQ** is a variant index type combining PQ and HNSW. It first uses PQ to quantize the vector,
then uses HNSW to quantize the PQ quantization result to get the index.
- building parameters:
**M**: Maximum degree of the node.
**efConstruction**: Take effect in stage of index construction.
**PQM**: m for PQ.
```python
# RHNSW_PQ
{
"index_type": "RHNSW_PQ",
"metric_type": "L2",
#Special for RHNSW_PQ
"M": 16, # int. 4~64
"efConstruction": 40, # int. 8~512
"PQM": 8, # int. CPU only. PQM = dim (mod m)
}
```
- search parameters:
**ef**: Take the effect in stage of search scope, should be larger than `top_k`.
```python
# RHNSW_PQ
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for RHNSW_PQ
"ef": 64 # int. top_k~32768
}
```
## RHNSW_SQ
**RHNSW_SQ** is a variant index type combining SQ and HNSW. It first uses SQ to quantize the vector, then uses HNSW to quantize the SQ quantization result to get the index.
- building parameters:
**M**: Maximum degree of the node.
**efConstruction**: Take effect in stage of index construction, search scope.
```python
# RHNSW_SQ
{
"index_type": "RHNSW_SQ",
"metric_type": "L2", # one of L2, IP
#Special for RHNSW_SQ
"M": 16, # int. 4~64
"efConstruction": 40 # int. 8~512
}
```
- search parameters:
**ef**: Take the effect in stage of search scope, should be larger than `top_k`.
```python
# RHNSW_SQ
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for RHNSW_SQ
"ef": 64 # int. top_k~32768
}
```
## NSG
**NSG** (*Refined Navigating Spreading-out Graph*) is a graph-based indexing algorithm. It sets the center
position of the whole image as a navigation point, and then uses a specific edge selection strategy to control
the out-degree of each point (less than or equal to `out_degree`). Therefore, it can reduce memory usage and
quickly locate the target position nearby during searching vectors.
The graph construction process of NSG is as follows:
1. Find `knng` nearest neighbors for each point.
2. Iterate at least `search_length` times based on `knng` nearest neighbor nodes to select `candidate_pool_size` possible nearest neighbor nodes.
3. Construct the out-edge of each point in the selected `candidate_pool_size` nodes according to the edge selection strategy.
The query process is similar to the graph building process. It starts from the navigation point and iterates at least `search_length` times to get the final result.
- building parameters:
**search_length**: Number of query iterations.
**out_degree**: Maximum out-degree of the node.
**candidate_pool_size**: Candidate pool size of the node.
**knng**: Number of nearest neighbors
```python
# NSG
{
"index_type": "NSG",
"metric_type": "L2",
#Special for RHNSW_SQ
"search_length": 60, # int. 10~300
"out_degree": 30, # int. 5~300
"candidate_pool_size": 300, # int. 50~1000
"knng": 50 # int. 5~300
}
```
- search parameters:
**search_length**: Number of query iterations
```python
# NSG
{
"topk": top_k,
"query": queries,
"metric_type": "L2", # one of L2, IP
#Special for RHNSW_SQ
"search_length": 100 # int. 10~300
}
```

View File

@ -8,6 +8,7 @@ set(MILVUS_QUERY_SRCS
visitors/ShowExprVisitor.cpp
visitors/ExecExprVisitor.cpp
Plan.cpp
Search.cpp
)
add_library(milvus_query ${MILVUS_QUERY_SRCS})
target_link_libraries(milvus_query milvus_proto)

View File

@ -0,0 +1,107 @@
#include "Search.h"
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
#include <knowhere/index/vector_index/VecIndexFactory.h>
#include "segcore/Reduce.h"
#include <faiss/utils/distances.h>
#include "utils/tools.h"
namespace milvus::query {
static faiss::ConcurrentBitsetPtr
create_bitmap_view(std::optional<const BitmapSimple*> bitmaps_opt, int64_t chunk_id) {
if (!bitmaps_opt.has_value()) {
return nullptr;
}
auto& bitmaps = *bitmaps_opt.value();
auto& src_vec = bitmaps.at(chunk_id);
auto dst = std::make_shared<faiss::ConcurrentBitset>(src_vec.size());
boost::to_block_range(src_vec, dst->mutable_data());
return dst;
}
using namespace segcore;
Status
QueryBruteForceImpl(const SegmentSmallIndex& segment,
const query::QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmaps_opt,
QueryResult& results) {
auto& record = segment.get_insert_record();
auto& schema = segment.get_schema();
auto& indexing_record = segment.get_indexing_record();
// step 1: binary search to find the barrier of the snapshot
auto ins_barrier = get_barrier(record, timestamp);
auto max_chunk = upper_div(ins_barrier, DefaultElementPerChunk);
// auto del_barrier = get_barrier(deleted_record_, timestamp);
#if 0
auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier);
Assert(bitmap_holder);
auto bitmap = bitmap_holder->bitmap_ptr;
#endif
// step 2.1: get meta
// step 2.2: get which vector field to search
auto vecfield_offset_opt = schema.get_offset(info.field_id_);
Assert(vecfield_offset_opt.has_value());
auto vecfield_offset = vecfield_offset_opt.value();
auto& field = schema[vecfield_offset];
auto vec_ptr = record.get_vec_entity<float>(vecfield_offset);
Assert(field.get_data_type() == DataType::VECTOR_FLOAT);
auto dim = field.get_dim();
auto topK = info.topK_;
auto total_count = topK * num_queries;
// TODO: optimize
// step 3: small indexing search
std::vector<int64_t> final_uids(total_count, -1);
std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
auto max_indexed_id = indexing_record.get_finished_ack();
const auto& indexing_entry = indexing_record.get_indexing(vecfield_offset);
auto search_conf = indexing_entry.get_search_conf(topK);
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
auto indexing = indexing_entry.get_indexing(chunk_id);
auto src_data = vec_ptr->get_chunk(chunk_id).data();
auto dataset = knowhere::GenDataset(num_queries, dim, src_data);
auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id);
auto ans = indexing->Query(dataset, search_conf, bitmap_view);
auto dis = ans->Get<float*>(milvus::knowhere::meta::DISTANCE);
auto uids = ans->Get<int64_t*>(milvus::knowhere::meta::IDS);
merge_into(num_queries, topK, final_dis.data(), final_uids.data(), dis, uids);
}
// step 4: brute force search where small indexing is unavailable
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
std::vector<int64_t> buf_uids(total_count, -1);
std::vector<float> buf_dis(total_count, std::numeric_limits<float>::max());
faiss::float_maxheap_array_t buf = {(size_t)num_queries, (size_t)topK, buf_uids.data(), buf_dis.data()};
auto src_data = vec_ptr->get_chunk(chunk_id).data();
auto nsize =
chunk_id != max_chunk - 1 ? DefaultElementPerChunk : ins_barrier - chunk_id * DefaultElementPerChunk;
auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id);
faiss::knn_L2sqr(query_data, src_data, dim, num_queries, nsize, &buf, bitmap_view);
merge_into(num_queries, topK, final_dis.data(), final_uids.data(), buf_dis.data(), buf_uids.data());
}
// step 5: convert offset to uids
for (auto& id : final_uids) {
if (id == -1) {
continue;
}
id = record.uids_[id];
}
results.result_ids_ = std::move(final_uids);
results.result_distances_ = std::move(final_dis);
results.topK_ = topK;
results.num_queries_ = num_queries;
return Status::OK();
}
} // namespace milvus::query

View File

@ -0,0 +1,19 @@
#pragma once
#include <optional>
#include "segcore/SegmentSmallIndex.h"
#include <boost/dynamic_bitset.hpp>
namespace milvus::query {
using BitmapChunk = boost::dynamic_bitset<>;
using BitmapSimple = std::deque<BitmapChunk>;
// note: c++17 don't support optional ref
Status
QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
const QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmap_opt,
segcore::QueryResult& results);
} // namespace milvus::query

View File

@ -4,6 +4,7 @@
#include "segcore/SegmentSmallIndex.h"
#include <optional>
#include "query/ExprImpl.h"
#include "boost/dynamic_bitset.hpp"
#include "ExprVisitor.h"
namespace milvus::query {
@ -22,7 +23,7 @@ class ExecExprVisitor : ExprVisitor {
visit(RangeExpr& expr) override;
public:
using RetType = std::vector<std::vector<bool>>;
using RetType = std::deque<boost::dynamic_bitset<>>;
explicit ExecExprVisitor(segcore::SegmentSmallIndex& segment) : segment_(segment) {
}
RetType

View File

@ -1,6 +1,7 @@
#include "segcore/SegmentSmallIndex.h"
#include <optional>
#include "query/ExprImpl.h"
#include "boost/dynamic_bitset.hpp"
#include "query/generated/ExecExprVisitor.h"
namespace milvus::query {
@ -10,7 +11,7 @@ namespace milvus::query {
namespace impl {
class ExecExprVisitor : ExprVisitor {
public:
using RetType = std::vector<std::vector<bool>>;
using RetType = std::deque<boost::dynamic_bitset<>>;
explicit ExecExprVisitor(segcore::SegmentSmallIndex& segment) : segment_(segment) {
}
RetType
@ -66,7 +67,7 @@ ExecExprVisitor::ExecRangeVisitorImpl(RangeExprImpl<T>& expr, Func func) -> RetT
auto& field_meta = schema[field_offset];
auto vec_ptr = records.get_scalar_entity<T>(field_offset);
auto& vec = *vec_ptr;
std::vector<std::vector<bool>> results(vec.chunk_size());
RetType results(vec.chunk_size());
for (auto chunk_id = 0; chunk_id < vec.chunk_size(); ++chunk_id) {
auto& result = results[chunk_id];
result.resize(segcore::DefaultElementPerChunk);

View File

@ -3,6 +3,8 @@
#include "segcore/SegmentBase.h"
#include "query/generated/ExecPlanNodeVisitor.h"
#include "segcore/SegmentSmallIndex.h"
#include "query/generated/ExecExprVisitor.h"
#include "query/Search.h"
namespace milvus::query {
@ -49,7 +51,12 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
auto& ph = placeholder_group_.at(0);
auto src_data = ph.get_blob<float>();
auto num_queries = ph.num_of_queries_;
segment->QueryBruteForceImpl(node.query_info_, src_data, num_queries, timestamp_, ret);
if (node.predicate_.has_value()) {
auto bitmap = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
auto ptr = &bitmap;
QueryBruteForceImpl(*segment, node.query_info_, src_data, num_queries, timestamp_, ptr, ret);
}
QueryBruteForceImpl(*segment, node.query_info_, src_data, num_queries, timestamp_, std::nullopt, ret);
ret_ = ret;
}

View File

@ -37,6 +37,5 @@ class AckResponder {
std::shared_mutex mutex_;
std::set<int64_t> acks_ = {0};
std::atomic<int64_t> minimum_ = 0;
// std::atomic<int64_t> maximum_ = 0;
};
} // namespace milvus::segcore

View File

@ -4,6 +4,7 @@
#include "common/Schema.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include <memory>
#include "segcore/Record.h"
namespace milvus::segcore {

View File

@ -67,7 +67,7 @@ class IndexingRecord {
// concurrent
int64_t
get_finished_ack() {
get_finished_ack() const {
return finished_ack_.GetAck();
}

View File

@ -2,6 +2,7 @@
#include "common/Schema.h"
#include "ConcurrentVector.h"
#include "AckResponder.h"
#include "segcore/Record.h"
namespace milvus::segcore {
struct InsertRecord {

View File

@ -0,0 +1,21 @@
#pragma once
#include "common/Schema.h"
namespace milvus::segcore {
template <typename RecordType>
inline int64_t
get_barrier(const RecordType& record, Timestamp timestamp) {
auto& vec = record.timestamps_;
int64_t beg = 0;
int64_t end = record.ack_responder_.GetAck();
while (beg < end) {
auto mid = (beg + end) / 2;
if (vec[mid] < timestamp) {
beg = mid + 1;
} else {
end = mid;
}
}
return beg;
}
} // namespace milvus::segcore

View File

@ -52,10 +52,7 @@ class SegmentBase {
virtual Status
Delete(int64_t reserved_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) = 0;
// query contains metadata of
virtual Status
QueryDeprecated(query::QueryDeprecatedPtr query, Timestamp timestamp, QueryResult& results) = 0;
public:
virtual Status
Search(const query::Plan* Plan,
const query::PlaceholderGroup* placeholder_groups[],

View File

@ -219,23 +219,6 @@ SegmentNaive::Delete(int64_t reserved_begin, int64_t size, const int64_t* uids_r
// return Status::OK();
}
template <typename RecordType>
int64_t
get_barrier(const RecordType& record, Timestamp timestamp) {
auto& vec = record.timestamps_;
int64_t beg = 0;
int64_t end = record.ack_responder_.GetAck();
while (beg < end) {
auto mid = (beg + end) / 2;
if (vec[mid] < timestamp) {
beg = mid + 1;
} else {
end = mid;
}
}
return beg;
}
Status
SegmentNaive::QueryImpl(query::QueryDeprecatedPtr query_info, Timestamp timestamp, QueryResult& result) {
auto ins_barrier = get_barrier(record_, timestamp);

View File

@ -41,10 +41,12 @@ class SegmentNaive : public SegmentBase {
Status
Delete(int64_t reserverd_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) override;
// query contains metadata of
private:
// NOTE: now deprecated, remains for further copy out
Status
QueryDeprecated(query::QueryDeprecatedPtr query_info, Timestamp timestamp, QueryResult& results) override;
QueryDeprecated(query::QueryDeprecatedPtr query_info, Timestamp timestamp, QueryResult& results);
public:
Status
Search(const query::Plan* Plan,
const query::PlaceholderGroup* placeholder_groups[],

View File

@ -204,137 +204,6 @@ SegmentSmallIndex::Delete(int64_t reserved_begin,
// return Status::OK();
}
template <typename RecordType>
int64_t
get_barrier(const RecordType& record, Timestamp timestamp) {
auto& vec = record.timestamps_;
int64_t beg = 0;
int64_t end = record.ack_responder_.GetAck();
while (beg < end) {
auto mid = (beg + end) / 2;
if (vec[mid] < timestamp) {
beg = mid + 1;
} else {
end = mid;
}
}
return beg;
}
Status
SegmentSmallIndex::QueryBruteForceImpl(const query::QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
QueryResult& results) {
// step 1: binary search to find the barrier of the snapshot
auto ins_barrier = get_barrier(record_, timestamp);
// auto del_barrier = get_barrier(deleted_record_, timestamp);
#if 0
auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier);
Assert(bitmap_holder);
auto bitmap = bitmap_holder->bitmap_ptr;
#endif
// step 2.1: get meta
// step 2.2: get which vector field to search
auto vecfield_offset_opt = schema_->get_offset(info.field_id_);
Assert(vecfield_offset_opt.has_value());
auto vecfield_offset = vecfield_offset_opt.value();
Assert(vecfield_offset < record_.entity_vec_.size());
auto& field = schema_->operator[](vecfield_offset);
auto vec_ptr = std::static_pointer_cast<ConcurrentVector<float>>(record_.entity_vec_.at(vecfield_offset));
Assert(field.get_data_type() == DataType::VECTOR_FLOAT);
auto dim = field.get_dim();
auto topK = info.topK_;
auto total_count = topK * num_queries;
// TODO: optimize
// step 3: small indexing search
std::vector<int64_t> final_uids(total_count, -1);
std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
auto max_chunk = (ins_barrier + DefaultElementPerChunk - 1) / DefaultElementPerChunk;
auto max_indexed_id = indexing_record_.get_finished_ack();
const auto& indexing_entry = indexing_record_.get_indexing(vecfield_offset);
auto search_conf = indexing_entry.get_search_conf(topK);
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
auto indexing = indexing_entry.get_indexing(chunk_id);
auto src_data = vec_ptr->get_chunk(chunk_id).data();
auto dataset = knowhere::GenDataset(num_queries, dim, src_data);
auto ans = indexing->Query(dataset, search_conf, nullptr);
auto dis = ans->Get<float*>(milvus::knowhere::meta::DISTANCE);
auto uids = ans->Get<int64_t*>(milvus::knowhere::meta::IDS);
merge_into(num_queries, topK, final_dis.data(), final_uids.data(), dis, uids);
}
// step 4: brute force search where small indexing is unavailable
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
std::vector<int64_t> buf_uids(total_count, -1);
std::vector<float> buf_dis(total_count, std::numeric_limits<float>::max());
faiss::float_maxheap_array_t buf = {(size_t)num_queries, (size_t)topK, buf_uids.data(), buf_dis.data()};
auto src_data = vec_ptr->get_chunk(chunk_id).data();
auto nsize =
chunk_id != max_chunk - 1 ? DefaultElementPerChunk : ins_barrier - chunk_id * DefaultElementPerChunk;
faiss::knn_L2sqr(query_data, src_data, dim, num_queries, nsize, &buf);
merge_into(num_queries, topK, final_dis.data(), final_uids.data(), buf_dis.data(), buf_uids.data());
}
// step 5: convert offset to uids
for (auto& id : final_uids) {
if (id == -1) {
continue;
}
id = record_.uids_[id];
}
results.result_ids_ = std::move(final_uids);
results.result_distances_ = std::move(final_dis);
results.topK_ = topK;
results.num_queries_ = num_queries;
// throw std::runtime_error("unimplemented");
return Status::OK();
}
Status
SegmentSmallIndex::QueryDeprecated(query::QueryDeprecatedPtr query_info, Timestamp timestamp, QueryResult& result) {
// TODO: enable delete
// TODO: enable index
// TODO: remove mock
if (query_info == nullptr) {
query_info = std::make_shared<query::QueryDeprecated>();
query_info->field_name = "fakevec";
query_info->topK = 10;
query_info->num_queries = 1;
auto dim = schema_->operator[]("fakevec").get_dim();
std::default_random_engine e(42);
std::uniform_real_distribution<> dis(0.0, 1.0);
query_info->query_raw_data.resize(query_info->num_queries * dim);
for (auto& x : query_info->query_raw_data) {
x = dis(e);
}
}
// TODO
query::QueryInfo info{
query_info->topK,
query_info->field_name,
"L2",
nlohmann::json{
{"nprobe", 10},
},
};
auto num_queries = query_info->num_queries;
return QueryBruteForceImpl(info, query_info->query_raw_data.data(), num_queries, timestamp, result);
}
Status
SegmentSmallIndex::Close() {
if (this->record_.reserved != this->record_.ack_responder_.GetAck()) {

View File

@ -65,10 +65,6 @@ class SegmentSmallIndex : public SegmentBase {
Status
Delete(int64_t reserverd_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) override;
// query contains metadata of
Status
QueryDeprecated(query::QueryDeprecatedPtr query_info, Timestamp timestamp, QueryResult& results) override;
Status
Search(const query::Plan* Plan,
const query::PlaceholderGroup* placeholder_groups[],
@ -112,6 +108,16 @@ class SegmentSmallIndex : public SegmentBase {
return record_;
}
const IndexingRecord&
get_indexing_record() const {
return indexing_record_;
}
const DeletedRecord&
get_deleted_record() const {
return deleted_record_;
}
const Schema&
get_schema() const {
return *schema_;
@ -148,12 +154,12 @@ class SegmentSmallIndex : public SegmentBase {
// Status
// QueryBruteForceImpl(query::QueryPtr query, Timestamp timestamp, QueryResult& results);
Status
QueryBruteForceImpl(const query::QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
QueryResult& results);
// Status
// QueryBruteForceImpl(const query::QueryInfo& info,
// const float* query_data,
// int64_t num_queries,
// Timestamp timestamp,
// QueryResult& results);
template <typename Type>
knowhere::IndexPtr
@ -172,4 +178,5 @@ class SegmentSmallIndex : public SegmentBase {
// std::unordered_map<std::string, knowhere::IndexPtr> indexings_; // index_name => indexing
tbb::concurrent_unordered_multimap<idx_t, int64_t> uid2offset_;
};
} // namespace milvus::segcore