mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-06 05:49:24 +08:00
fix conflict
Former-commit-id: 9b741b6360e0704ec875766c7ef3448d0eba4212
This commit is contained in:
commit
975403286e
@ -10,7 +10,14 @@ container('milvus-build-env') {
|
||||
sh "git config --global user.name \"test\""
|
||||
withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
|
||||
sh "./build.sh -l"
|
||||
sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' && export JFROG_USER_NAME='${USERNAME}' && export JFROG_PASSWORD='${PASSWORD}' && ./build.sh -t ${params.BUILD_TYPE} -j -u -c"
|
||||
sh "rm -rf cmake_build"
|
||||
sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' \
|
||||
&& export JFROG_USER_NAME='${USERNAME}' \
|
||||
&& export JFROG_PASSWORD='${PASSWORD}' \
|
||||
&& export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.2.1/faiss-branch-0.2.1.tar.gz' \
|
||||
&& ./build.sh -t ${params.BUILD_TYPE} -j -u -c"
|
||||
|
||||
sh "./coverage.sh -u root -p Fantast1c -t 192.168.1.194"
|
||||
}
|
||||
}
|
||||
} catch (exc) {
|
||||
|
@ -11,6 +11,7 @@ container('milvus-build-env') {
|
||||
withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
|
||||
sh "./build.sh -l"
|
||||
sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' && export JFROG_USER_NAME='${USERNAME}' && export JFROG_PASSWORD='${PASSWORD}' && ./build.sh -t ${params.BUILD_TYPE} -j"
|
||||
sh "./coverage.sh -u root -p Fantast1c -t 192.168.1.194"
|
||||
}
|
||||
}
|
||||
} catch (exc) {
|
||||
|
@ -11,17 +11,21 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
- MS-587 - Count get wrong result after adding vectors and index built immediately
|
||||
- MS-599 - Search wrong result when table created with metric_type: IP
|
||||
- MS-601 - Docker logs error caused by get CPUTemperature error
|
||||
- MS-622 - Delete vectors should be failed if date range is invalid
|
||||
- MS-605 - Server going down during searching vectors
|
||||
- MS-620 - Get table row counts display wrong error code
|
||||
- MS-622 - Delete vectors should be failed if date range is invalid
|
||||
- MS-624 - Search vectors failed if time ranges long enough
|
||||
- MS-637 - Out of memory when load too many tasks
|
||||
- MS-639 - SQ8H index created failed and server hang
|
||||
- MS-640 - Cache object size calculate incorrect
|
||||
- MS-641 - Segment fault(signal 11) in PickToLoad
|
||||
- MS-639 - SQ8H index created failed and server hang
|
||||
- MS-647 - [monitor] grafana display average cpu-temp
|
||||
- MS-644 - Search crashed with index-type: flat
|
||||
- MS-624 - Search vectors failed if time ranges long enough
|
||||
- MS-647 - grafana display average cpu-temp
|
||||
- MS-652 - IVFSQH quantization double free
|
||||
- MS-650 - SQ8H index create issue
|
||||
- MS-653 - When config check fail, Milvus close without message
|
||||
- MS-654 - Describe index timeout when building index
|
||||
- MS-658 - Fix SQ8 Hybrid can't search
|
||||
|
||||
## Improvement
|
||||
- MS-552 - Add and change the easylogging library
|
||||
@ -39,10 +43,11 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
- MS-608 - Update TODO names
|
||||
- MS-609 - Update task construct function
|
||||
- MS-611 - Add resources validity check in ResourceMgr
|
||||
- MS-619 - Add optimizer class in scheduler
|
||||
- MS-614 - Preload table at startup
|
||||
- MS-619 - Add optimizer class in scheduler
|
||||
- MS-626 - Refactor DataObj to support cache any type data
|
||||
- MS-648 - Improve unittest
|
||||
- MS-655 - Upgrade SPTAG
|
||||
|
||||
## New Feature
|
||||
- MS-627 - Integrate new index: IVFSQHybrid
|
||||
@ -60,8 +65,8 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
- MS-600 - Reconstruct unittest code
|
||||
- MS-602 - Remove zilliz namespace
|
||||
- MS-610 - Change error code base value from hex to decimal
|
||||
- MS-635 - Add compile option to support customized faiss
|
||||
- MS-624 - Re-organize project directory for open-source
|
||||
- MS-635 - Add compile option to support customized faiss
|
||||
|
||||
# Milvus 0.4.0 (2019-09-12)
|
||||
|
||||
|
@ -125,10 +125,6 @@ set(MILVUS_SOURCE_DIR ${PROJECT_SOURCE_DIR})
|
||||
set(MILVUS_BINARY_DIR ${PROJECT_BINARY_DIR})
|
||||
set(MILVUS_ENGINE_SRC ${PROJECT_SOURCE_DIR}/src)
|
||||
|
||||
if (CUSTOMIZATION)
|
||||
add_definitions(-DCUSTOMIZATION)
|
||||
endif (CUSTOMIZATION)
|
||||
|
||||
include(ExternalProject)
|
||||
include(DefineOptions)
|
||||
include(BuildUtils)
|
||||
@ -136,6 +132,10 @@ include(ThirdPartyPackages)
|
||||
|
||||
config_summary()
|
||||
|
||||
if (CUSTOMIZATION)
|
||||
add_definitions(-DCUSTOMIZATION)
|
||||
endif (CUSTOMIZATION)
|
||||
|
||||
add_subdirectory(src)
|
||||
|
||||
if (BUILD_UNIT_TEST STREQUAL "ON")
|
||||
|
@ -76,13 +76,13 @@ $ sudo ln -s /path/to/libmysqlclient.so /path/to/libmysqlclient_r.so
|
||||
###### Step 2 Build
|
||||
|
||||
```shell
|
||||
$ cd [Milvus sourcecode path]/cpp
|
||||
$ cd [Milvus sourcecode path]/core
|
||||
$ ./build.sh -t Debug
|
||||
or
|
||||
$ ./build.sh -t Release
|
||||
```
|
||||
|
||||
When the build is completed, all the stuff that you need in order to run Milvus will be installed under `[Milvus root path]/cpp/milvus`.
|
||||
When the build is completed, all the stuff that you need in order to run Milvus will be installed under `[Milvus root path]/core/milvus`.
|
||||
|
||||
If you encounter the following error message,
|
||||
`protocol https not supported or disabled in libcurl`
|
||||
@ -148,11 +148,20 @@ $ sudo apt-get install lcov
|
||||
```shell
|
||||
$ ./build.sh -u -c
|
||||
```
|
||||
Run mysql docker
|
||||
```shell
|
||||
docker pull mysql:latest
|
||||
docker run -p 3306:3306 -e MYSQL_ROOT_PASSWORD=123456 -d mysql:latest
|
||||
```
|
||||
Run code coverage
|
||||
```shell
|
||||
$ ./coverage.sh -u root -p 123456 -t 127.0.0.1
|
||||
```
|
||||
|
||||
##### Launch Milvus server
|
||||
|
||||
```shell
|
||||
$ cd [Milvus root path]/cpp/milvus
|
||||
$ cd [Milvus root path]/core/milvus
|
||||
```
|
||||
|
||||
Add `lib/` directory to `LD_LIBRARY_PATH`
|
||||
@ -202,7 +211,7 @@ $ python3 example.py
|
||||
|
||||
```shell
|
||||
# Run Milvus C++ example
|
||||
$ cd [Milvus root path]/cpp/milvus/bin
|
||||
$ cd [Milvus root path]/core/milvus/bin
|
||||
$ ./sdk_simple
|
||||
```
|
||||
|
||||
|
@ -9,10 +9,16 @@ DB_PATH="/opt/milvus"
|
||||
PROFILING="OFF"
|
||||
USE_JFROG_CACHE="OFF"
|
||||
RUN_CPPLINT="OFF"
|
||||
CUSTOMIZATION="ON"
|
||||
CUSTOMIZATION="OFF" # default use ori faiss
|
||||
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
|
||||
|
||||
wget -q --method HEAD
|
||||
CUSTOMIZED_FAISS_URL="${FAISS_URL:-NONE}"
|
||||
wget -q --method HEAD ${CUSTOMIZED_FAISS_URL}
|
||||
if [ $? -eq 0 ]; then
|
||||
CUSTOMIZATION="ON"
|
||||
else
|
||||
CUSTOMIZATION="OFF"
|
||||
fi
|
||||
|
||||
while getopts "p:d:t:ulrcgjhx" arg
|
||||
do
|
||||
@ -49,7 +55,7 @@ do
|
||||
USE_JFROG_CACHE="ON"
|
||||
;;
|
||||
x)
|
||||
CUSTOMIZATION="OFF"
|
||||
CUSTOMIZATION="OFF" # force use ori faiss
|
||||
;;
|
||||
h) # help
|
||||
echo "
|
||||
@ -94,6 +100,7 @@ CMAKE_CMD="cmake \
|
||||
-DMILVUS_ENABLE_PROFILING=${PROFILING} \
|
||||
-DUSE_JFROG_CACHE=${USE_JFROG_CACHE} \
|
||||
-DCUSTOMIZATION=${CUSTOMIZATION} \
|
||||
-DFAISS_URL=${CUSTOMIZED_FAISS_URL} \
|
||||
../"
|
||||
echo ${CMAKE_CMD}
|
||||
${CMAKE_CMD}
|
||||
@ -136,11 +143,4 @@ else
|
||||
fi
|
||||
|
||||
make install || exit 1
|
||||
|
||||
# evaluate code coverage
|
||||
if [[ ${BUILD_COVERAGE} == "ON" ]]; then
|
||||
cd -
|
||||
bash `pwd`/coverage.sh
|
||||
cd -
|
||||
fi
|
||||
fi
|
@ -88,6 +88,11 @@ function(ExternalProject_Create_Cache project_name package_file install_path cac
|
||||
file(REMOVE ${package_file})
|
||||
endif()
|
||||
|
||||
string(REGEX REPLACE "(.+)/.+$" "\\1" package_dir ${package_file})
|
||||
if(NOT EXISTS ${package_dir})
|
||||
file(MAKE_DIRECTORY ${package_dir})
|
||||
endif()
|
||||
|
||||
message(STATUS "Will create cached package file: ${package_file}")
|
||||
|
||||
ExternalProject_Add_Step(${project_name} package
|
||||
|
@ -2,6 +2,44 @@
|
||||
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/milvus/lib
|
||||
|
||||
MYSQL_USER_NAME=root
|
||||
MYSQL_PASSWORD=123456
|
||||
MYSQL_HOST='127.0.0.1'
|
||||
MYSQL_PORT='3306'
|
||||
|
||||
while getopts "u:p:t:h" arg
|
||||
do
|
||||
case $arg in
|
||||
u)
|
||||
MYSQL_USER_NAME=$OPTARG
|
||||
;;
|
||||
p)
|
||||
MYSQL_PASSWORD=$OPTARG
|
||||
;;
|
||||
t)
|
||||
MYSQL_HOST=$OPTARG
|
||||
;;
|
||||
h) # help
|
||||
echo "
|
||||
|
||||
parameter:
|
||||
-u: mysql account
|
||||
-p: mysql password
|
||||
-t: mysql host
|
||||
-h: help
|
||||
|
||||
usage:
|
||||
./coverage.sh -u \${MYSQL_USER} -p \${MYSQL_PASSWORD} -t \${MYSQL_HOST} [-h]
|
||||
"
|
||||
exit 0
|
||||
;;
|
||||
?)
|
||||
echo "ERROR! unknown argument"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
LCOV_CMD="lcov"
|
||||
LCOV_GEN_CMD="genhtml"
|
||||
|
||||
@ -15,34 +53,12 @@ DIR_GCNO="cmake_build"
|
||||
DIR_UNITTEST="milvus/unittest"
|
||||
|
||||
# delete old code coverage info files
|
||||
rm -f FILE_INFO_BASE
|
||||
rm -f FILE_INFO_MILVUS
|
||||
rm -f FILE_INFO_OUTPUT
|
||||
rm -f FILE_INFO_OUTPUT_NEW
|
||||
rm -rf lcov_out
|
||||
rm -f FILE_INFO_BASE FILE_INFO_MILVUS FILE_INFO_OUTPUT FILE_INFO_OUTPUT_NEW
|
||||
|
||||
MYSQL_USER_NAME=root
|
||||
MYSQL_PASSWORD=Fantast1c
|
||||
MYSQL_HOST='192.168.1.194'
|
||||
MYSQL_PORT='3306'
|
||||
|
||||
MYSQL_DB_NAME=milvus_`date +%s%N`
|
||||
|
||||
function mysql_exc()
|
||||
{
|
||||
cmd=$1
|
||||
mysql -h${MYSQL_HOST} -u${MYSQL_USER_NAME} -p${MYSQL_PASSWORD} -e "${cmd}"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "mysql $cmd run failed"
|
||||
fi
|
||||
}
|
||||
|
||||
mysql_exc "CREATE DATABASE IF NOT EXISTS ${MYSQL_DB_NAME};"
|
||||
mysql_exc "GRANT ALL PRIVILEGES ON ${MYSQL_DB_NAME}.* TO '${MYSQL_USER_NAME}'@'%';"
|
||||
mysql_exc "FLUSH PRIVILEGES;"
|
||||
mysql_exc "USE ${MYSQL_DB_NAME};"
|
||||
|
||||
MYSQL_USER_NAME=root
|
||||
MYSQL_PASSWORD=Fantast1c
|
||||
MYSQL_HOST='192.168.1.194'
|
||||
MYSQL_PORT='3306'
|
||||
|
||||
MYSQL_DB_NAME=milvus_`date +%s%N`
|
||||
|
||||
|
@ -26,6 +26,7 @@ include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-milvus)
|
||||
|
||||
#this statement must put here, since the CORE_INCLUDE_DIRS is defined in code/CMakeList.txt
|
||||
add_subdirectory(index)
|
||||
|
||||
set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
|
||||
foreach (dir ${CORE_INCLUDE_DIRS})
|
||||
include_directories(${dir})
|
||||
@ -182,8 +183,6 @@ target_link_libraries(milvus_server
|
||||
install(TARGETS milvus_server DESTINATION bin)
|
||||
|
||||
install(FILES
|
||||
${CMAKE_SOURCE_DIR}/src/index/thirdparty/tbb/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX}
|
||||
${CMAKE_SOURCE_DIR}/src/index/thirdparty/tbb/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX}.2
|
||||
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}
|
||||
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3
|
||||
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3.2.4
|
||||
|
@ -1,31 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "config/ConfigMgr.h"
|
||||
#include "YamlConfigMgr.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
|
||||
ConfigMgr*
|
||||
ConfigMgr::GetInstance() {
|
||||
static YamlConfigMgr mgr;
|
||||
return &mgr;
|
||||
}
|
||||
|
||||
} // namespace server
|
||||
} // namespace milvus
|
@ -17,42 +17,28 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ConfigNode.h"
|
||||
#include "utils/Error.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ConfigNode.h"
|
||||
#include "utils/Status.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
|
||||
// this class can parse nested config file and return config item
|
||||
// config file example(yaml style)
|
||||
// AAA: 1
|
||||
// BBB:
|
||||
// CCC: hello
|
||||
// DDD: 23.5
|
||||
//
|
||||
// usage
|
||||
// const ConfigMgr* mgr = ConfigMgr::GetInstance();
|
||||
// const ConfigNode& node = mgr->GetRootNode();
|
||||
// std::string val = node.GetValue("AAA"); // return '1'
|
||||
// const ConfigNode& child = node.GetChild("BBB");
|
||||
// val = child.GetValue("CCC"); //return 'hello'
|
||||
|
||||
class ConfigMgr {
|
||||
public:
|
||||
static ConfigMgr*
|
||||
GetInstance();
|
||||
|
||||
virtual ErrorCode
|
||||
virtual Status
|
||||
LoadConfigFile(const std::string& filename) = 0;
|
||||
|
||||
virtual void
|
||||
Print() const = 0; // will be deleted
|
||||
|
||||
virtual std::string
|
||||
DumpString() const = 0;
|
||||
|
||||
virtual const ConfigNode&
|
||||
GetRootNode() const = 0;
|
||||
|
||||
virtual ConfigNode&
|
||||
GetRootNode() = 0;
|
||||
};
|
||||
|
@ -18,29 +18,20 @@
|
||||
#include "config/YamlConfigMgr.h"
|
||||
#include "utils/Log.h"
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
|
||||
ErrorCode
|
||||
Status
|
||||
YamlConfigMgr::LoadConfigFile(const std::string& filename) {
|
||||
struct stat directoryStat;
|
||||
int statOK = stat(filename.c_str(), &directoryStat);
|
||||
if (statOK != 0) {
|
||||
SERVER_LOG_ERROR << "File not found: " << filename;
|
||||
return SERVER_UNEXPECTED_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
node_ = YAML::LoadFile(filename);
|
||||
LoadConfigNode(node_, config_);
|
||||
} catch (YAML::Exception& e) {
|
||||
SERVER_LOG_ERROR << "Failed to load config file: " << std::string(e.what());
|
||||
return SERVER_UNEXPECTED_ERROR;
|
||||
std::string str = "Exception: load config file fail: " + std::string(e.what());
|
||||
return Status(SERVER_UNEXPECTED_ERROR, str);
|
||||
}
|
||||
|
||||
return SERVER_SUCCESS;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -17,27 +17,35 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ConfigMgr.h"
|
||||
#include "ConfigNode.h"
|
||||
#include "utils/Error.h"
|
||||
|
||||
#include <yaml-cpp/yaml.h>
|
||||
#include <string>
|
||||
|
||||
#include "ConfigMgr.h"
|
||||
#include "utils/Status.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
|
||||
class YamlConfigMgr : public ConfigMgr {
|
||||
public:
|
||||
virtual ErrorCode
|
||||
static ConfigMgr*
|
||||
GetInstance() {
|
||||
static YamlConfigMgr mgr;
|
||||
return &mgr;
|
||||
}
|
||||
|
||||
virtual Status
|
||||
LoadConfigFile(const std::string& filename);
|
||||
|
||||
virtual void
|
||||
Print() const;
|
||||
|
||||
virtual std::string
|
||||
DumpString() const;
|
||||
|
||||
virtual const ConfigNode&
|
||||
GetRootNode() const;
|
||||
|
||||
virtual ConfigNode&
|
||||
GetRootNode();
|
||||
|
||||
|
@ -251,11 +251,6 @@ DBImpl::InsertVectors(const std::string& table_id, uint64_t n, const float* vect
|
||||
Status status;
|
||||
milvus::server::CollectInsertMetrics metrics(n, status);
|
||||
status = mem_mgr_->InsertVectors(table_id, n, vectors, vector_ids);
|
||||
// std::chrono::microseconds time_span =
|
||||
// std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
|
||||
// double average_time = double(time_span.count()) / n;
|
||||
|
||||
// ENGINE_LOG_DEBUG << "Insert vectors to cache finished";
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -359,7 +354,7 @@ DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t npr
|
||||
return Status(DB_ERROR, "Milsvus server is shutdown!");
|
||||
}
|
||||
|
||||
ENGINE_LOG_DEBUG << "Query by dates for table: " << table_id;
|
||||
ENGINE_LOG_DEBUG << "Query by dates for table: " << table_id << " date range count: " << dates.size();
|
||||
|
||||
// get all table files from table
|
||||
meta::DatePartionedTableFilesSchema files;
|
||||
@ -377,7 +372,7 @@ DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t npr
|
||||
}
|
||||
|
||||
cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info before query
|
||||
status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, dates, results);
|
||||
status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results);
|
||||
cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info after query
|
||||
return status;
|
||||
}
|
||||
@ -389,7 +384,7 @@ DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_
|
||||
return Status(DB_ERROR, "Milsvus server is shutdown!");
|
||||
}
|
||||
|
||||
ENGINE_LOG_DEBUG << "Query by file ids for table: " << table_id;
|
||||
ENGINE_LOG_DEBUG << "Query by file ids for table: " << table_id << " date range count: " << dates.size();
|
||||
|
||||
// get specified files
|
||||
std::vector<size_t> ids;
|
||||
@ -418,7 +413,7 @@ DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_
|
||||
}
|
||||
|
||||
cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info before query
|
||||
status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, dates, results);
|
||||
status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results);
|
||||
cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info after query
|
||||
return status;
|
||||
}
|
||||
@ -437,14 +432,13 @@ DBImpl::Size(uint64_t& result) {
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Status
|
||||
DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
|
||||
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) {
|
||||
uint64_t nprobe, const float* vectors, QueryResults& results) {
|
||||
server::CollectQueryMetrics metrics(nq);
|
||||
|
||||
TimeRecorder rc("");
|
||||
|
||||
// step 1: get files to search
|
||||
ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size()
|
||||
<< " date range count: " << dates.size();
|
||||
ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size();
|
||||
scheduler::SearchJobPtr job = std::make_shared<scheduler::SearchJob>(0, k, nq, nprobe, vectors);
|
||||
for (auto& file : files) {
|
||||
scheduler::TableFileSchemaPtr file_ptr = std::make_shared<meta::TableFileSchema>(file);
|
||||
@ -458,32 +452,7 @@ DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& fi
|
||||
return job->GetStatus();
|
||||
}
|
||||
|
||||
// step 3: print time cost information
|
||||
// double load_cost = context->LoadCost();
|
||||
// double search_cost = context->SearchCost();
|
||||
// double reduce_cost = context->ReduceCost();
|
||||
// std::string load_info = TimeRecorder::GetTimeSpanStr(load_cost);
|
||||
// std::string search_info = TimeRecorder::GetTimeSpanStr(search_cost);
|
||||
// std::string reduce_info = TimeRecorder::GetTimeSpanStr(reduce_cost);
|
||||
// if(search_cost > 0.0 || reduce_cost > 0.0) {
|
||||
// double total_cost = load_cost + search_cost + reduce_cost;
|
||||
// double load_percent = load_cost/total_cost;
|
||||
// double search_percent = search_cost/total_cost;
|
||||
// double reduce_percent = reduce_cost/total_cost;
|
||||
//
|
||||
// ENGINE_LOG_DEBUG << "Engine load index totally cost: " << load_info
|
||||
// << " percent: " << load_percent*100 << "%";
|
||||
// ENGINE_LOG_DEBUG << "Engine search index totally cost: " << search_info
|
||||
// << " percent: " << search_percent*100 << "%";
|
||||
// ENGINE_LOG_DEBUG << "Engine reduce topk totally cost: " << reduce_info
|
||||
// << " percent: " << reduce_percent*100 << "%";
|
||||
// } else {
|
||||
// ENGINE_LOG_DEBUG << "Engine load cost: " << load_info
|
||||
// << " search cost: " << search_info
|
||||
// << " reduce cost: " << reduce_info;
|
||||
// }
|
||||
|
||||
// step 4: construct results
|
||||
// step 3: construct results
|
||||
results = job->GetResult();
|
||||
rc.ElapseFromBegin("Engine query totally cost");
|
||||
|
||||
@ -540,7 +509,13 @@ DBImpl::StartMetricTask() {
|
||||
server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
|
||||
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
|
||||
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
|
||||
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total);
|
||||
if (cache_total > 0) {
|
||||
double cache_usage_double = cache_usage;
|
||||
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total);
|
||||
} else {
|
||||
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0);
|
||||
}
|
||||
|
||||
server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
|
||||
uint64_t size;
|
||||
Size(size);
|
||||
@ -695,14 +670,13 @@ DBImpl::BackgroundMergeFiles(const std::string& table_id) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool has_merge = false;
|
||||
for (auto& kv : raw_files) {
|
||||
auto files = kv.second;
|
||||
if (files.size() < options_.merge_trigger_number_) {
|
||||
ENGINE_LOG_DEBUG << "Files number not greater equal than merge trigger number, skip merge action";
|
||||
continue;
|
||||
}
|
||||
has_merge = true;
|
||||
|
||||
MergeFiles(table_id, kv.first, kv.second);
|
||||
|
||||
if (shutting_down_.load(std::memory_order_acquire)) {
|
||||
@ -770,127 +744,6 @@ DBImpl::StartBuildIndexTask(bool force) {
|
||||
}
|
||||
}
|
||||
|
||||
Status
|
||||
DBImpl::BuildIndex(const meta::TableFileSchema& file) {
|
||||
ExecutionEnginePtr to_index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_,
|
||||
(MetricType)file.metric_type_, file.nlist_);
|
||||
if (to_index == nullptr) {
|
||||
ENGINE_LOG_ERROR << "Invalid engine type";
|
||||
return Status(DB_ERROR, "Invalid engine type");
|
||||
}
|
||||
|
||||
try {
|
||||
// step 1: load index
|
||||
Status status = to_index->Load(options_.insert_cache_immediately_);
|
||||
if (!status.ok()) {
|
||||
ENGINE_LOG_ERROR << "Failed to load index file: " << status.ToString();
|
||||
return status;
|
||||
}
|
||||
|
||||
// step 2: create table file
|
||||
meta::TableFileSchema table_file;
|
||||
table_file.table_id_ = file.table_id_;
|
||||
table_file.date_ = file.date_;
|
||||
table_file.file_type_ =
|
||||
meta::TableFileSchema::NEW_INDEX; // for multi-db-path, distribute index file averagely to each path
|
||||
status = meta_ptr_->CreateTableFile(table_file);
|
||||
if (!status.ok()) {
|
||||
ENGINE_LOG_ERROR << "Failed to create table file: " << status.ToString();
|
||||
return status;
|
||||
}
|
||||
|
||||
// step 3: build index
|
||||
std::shared_ptr<ExecutionEngine> index;
|
||||
|
||||
try {
|
||||
server::CollectBuildIndexMetrics metrics;
|
||||
index = to_index->BuildIndex(table_file.location_, (EngineType)table_file.engine_type_);
|
||||
if (index == nullptr) {
|
||||
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
|
||||
status = meta_ptr_->UpdateTableFile(table_file);
|
||||
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_
|
||||
<< " to to_delete";
|
||||
|
||||
return status;
|
||||
}
|
||||
} catch (std::exception& ex) {
|
||||
// typical error: out of gpu memory
|
||||
std::string msg = "BuildIndex encounter exception: " + std::string(ex.what());
|
||||
ENGINE_LOG_ERROR << msg;
|
||||
|
||||
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
|
||||
status = meta_ptr_->UpdateTableFile(table_file);
|
||||
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
|
||||
|
||||
std::cout << "ERROR: failed to build index, index file is too large or gpu memory is not enough"
|
||||
<< std::endl;
|
||||
|
||||
return Status(DB_ERROR, msg);
|
||||
}
|
||||
|
||||
// step 4: if table has been deleted, dont save index file
|
||||
bool has_table = false;
|
||||
meta_ptr_->HasTable(file.table_id_, has_table);
|
||||
if (!has_table) {
|
||||
meta_ptr_->DeleteTableFiles(file.table_id_);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// step 5: save index file
|
||||
try {
|
||||
index->Serialize();
|
||||
} catch (std::exception& ex) {
|
||||
// typical error: out of disk space or permition denied
|
||||
std::string msg = "Serialize index encounter exception: " + std::string(ex.what());
|
||||
ENGINE_LOG_ERROR << msg;
|
||||
|
||||
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
|
||||
status = meta_ptr_->UpdateTableFile(table_file);
|
||||
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
|
||||
|
||||
std::cout << "ERROR: failed to persist index file: " << table_file.location_
|
||||
<< ", possible out of disk space" << std::endl;
|
||||
|
||||
return Status(DB_ERROR, msg);
|
||||
}
|
||||
|
||||
// step 6: update meta
|
||||
table_file.file_type_ = meta::TableFileSchema::INDEX;
|
||||
table_file.file_size_ = index->PhysicalSize();
|
||||
table_file.row_count_ = index->Count();
|
||||
|
||||
auto origin_file = file;
|
||||
origin_file.file_type_ = meta::TableFileSchema::BACKUP;
|
||||
|
||||
meta::TableFilesSchema update_files = {table_file, origin_file};
|
||||
status = meta_ptr_->UpdateTableFiles(update_files);
|
||||
if (status.ok()) {
|
||||
ENGINE_LOG_DEBUG << "New index file " << table_file.file_id_ << " of size " << index->PhysicalSize()
|
||||
<< " bytes"
|
||||
<< " from file " << origin_file.file_id_;
|
||||
|
||||
if (options_.insert_cache_immediately_) {
|
||||
index->Cache();
|
||||
}
|
||||
} else {
|
||||
// failed to update meta, mark the new file as to_delete, don't delete old file
|
||||
origin_file.file_type_ = meta::TableFileSchema::TO_INDEX;
|
||||
status = meta_ptr_->UpdateTableFile(origin_file);
|
||||
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << origin_file.file_id_ << " to to_index";
|
||||
|
||||
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
|
||||
status = meta_ptr_->UpdateTableFile(table_file);
|
||||
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
|
||||
}
|
||||
} catch (std::exception& ex) {
|
||||
std::string msg = "Build index encounter exception: " + std::string(ex.what());
|
||||
ENGINE_LOG_ERROR << msg;
|
||||
return Status(DB_ERROR, msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void
|
||||
DBImpl::BackgroundBuildIndex() {
|
||||
ENGINE_LOG_TRACE << "Background build index thread start";
|
||||
@ -915,17 +768,6 @@ DBImpl::BackgroundBuildIndex() {
|
||||
ENGINE_LOG_ERROR << "Building index failed: " << status.ToString();
|
||||
}
|
||||
}
|
||||
// for (auto &file : to_index_files) {
|
||||
// status = BuildIndex(file);
|
||||
// if (!status.ok()) {
|
||||
// ENGINE_LOG_ERROR << "Building index for " << file.id_ << " failed: " << status.ToString();
|
||||
// }
|
||||
//
|
||||
// if (shutting_down_.load(std::memory_order_acquire)) {
|
||||
// ENGINE_LOG_DEBUG << "Server will shutdown, skip build index action";
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
ENGINE_LOG_TRACE << "Background build index thread exit";
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ class DBImpl : public DB {
|
||||
private:
|
||||
Status
|
||||
QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
|
||||
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results);
|
||||
uint64_t nprobe, const float* vectors, QueryResults& results);
|
||||
|
||||
void
|
||||
BackgroundTimerTask();
|
||||
@ -133,9 +133,6 @@ class DBImpl : public DB {
|
||||
void
|
||||
BackgroundBuildIndex();
|
||||
|
||||
Status
|
||||
BuildIndex(const meta::TableFileSchema&);
|
||||
|
||||
Status
|
||||
MemSerialize();
|
||||
|
||||
|
@ -80,8 +80,7 @@ class ExecutionEngine {
|
||||
Merge(const std::string& location) = 0;
|
||||
|
||||
virtual Status
|
||||
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
|
||||
bool hybrid) const = 0;
|
||||
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0;
|
||||
|
||||
virtual std::shared_ptr<ExecutionEngine>
|
||||
BuildIndex(const std::string& location, EngineType engine_type) = 0;
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
//#define ON_SEARCH
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) {
|
||||
Status
|
||||
ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
|
||||
if (hybrid) {
|
||||
auto key = location_ + ".quantizer";
|
||||
auto quantizer =
|
||||
std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key));
|
||||
|
||||
auto conf = std::make_shared<knowhere::QuantizerCfg>();
|
||||
conf->gpu_id = device_id;
|
||||
|
||||
if (quantizer) {
|
||||
// cache hit
|
||||
conf->mode = 2;
|
||||
auto new_index = index_->LoadData(quantizer->Data(), conf);
|
||||
index_ = new_index;
|
||||
} else {
|
||||
auto pair = index_->CopyToGpuWithQuantizer(device_id);
|
||||
index_ = pair.first;
|
||||
|
||||
// cache
|
||||
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
|
||||
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
|
||||
bool hybrid) const {
|
||||
bool hybrid) {
|
||||
if (index_type_ == EngineType::FAISS_IVFSQ8H) {
|
||||
if (!hybrid) {
|
||||
const std::string key = location_ + ".quantizer";
|
||||
std::vector<uint64_t> gpus = scheduler::get_gpu_pool();
|
||||
|
||||
const int64_t NOT_FOUND = -1;
|
||||
int64_t device_id = NOT_FOUND;
|
||||
|
||||
// cache hit
|
||||
{
|
||||
knowhere::QuantizerPtr quantizer = nullptr;
|
||||
|
||||
for (auto& gpu : gpus) {
|
||||
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
|
||||
if (auto cached_quantizer = cache->GetIndex(key)) {
|
||||
device_id = gpu;
|
||||
quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data();
|
||||
}
|
||||
}
|
||||
|
||||
if (device_id != NOT_FOUND) {
|
||||
// cache hit
|
||||
auto config = std::make_shared<knowhere::QuantizerCfg>();
|
||||
config->gpu_id = device_id;
|
||||
config->mode = 2;
|
||||
auto new_index = index_->LoadData(quantizer, config);
|
||||
index_ = new_index;
|
||||
}
|
||||
}
|
||||
|
||||
if (device_id == NOT_FOUND) {
|
||||
// cache miss
|
||||
std::vector<int64_t> all_free_mem;
|
||||
for (auto& gpu : gpus) {
|
||||
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
|
||||
auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
|
||||
all_free_mem.push_back(free_mem);
|
||||
}
|
||||
|
||||
auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
|
||||
auto best_index = std::distance(all_free_mem.begin(), max_e);
|
||||
device_id = gpus[best_index];
|
||||
|
||||
auto pair = index_->CopyToGpuWithQuantizer(device_id);
|
||||
index_ = pair.first;
|
||||
|
||||
// cache
|
||||
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
|
||||
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (index_ == nullptr) {
|
||||
ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search";
|
||||
return Status(DB_ERROR, "index is null");
|
||||
|
@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine {
|
||||
|
||||
Status
|
||||
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
|
||||
bool hybrid = false) const override;
|
||||
bool hybrid = false) override;
|
||||
|
||||
ExecutionEnginePtr
|
||||
BuildIndex(const std::string& location, EngineType engine_type) override;
|
||||
|
@ -88,6 +88,11 @@ function(ExternalProject_Create_Cache project_name package_file install_path cac
|
||||
file(REMOVE ${package_file})
|
||||
endif()
|
||||
|
||||
string(REGEX REPLACE "(.+)/.+$" "\\1" package_dir ${package_file})
|
||||
if(NOT EXISTS ${package_dir})
|
||||
file(MAKE_DIRECTORY ${package_dir})
|
||||
endif()
|
||||
|
||||
message(STATUS "Will create cached package file: ${package_file}")
|
||||
|
||||
ExternalProject_Add_Step(${project_name} package
|
||||
|
@ -233,17 +233,16 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
|
||||
endforeach()
|
||||
|
||||
if(CUSTOMIZATION)
|
||||
set(FAISS_SOURCE_URL "http://192.168.1.105:6060/jinhai/faiss/-/archive/${FAISS_VERSION}/faiss-${FAISS_VERSION}.tar.gz")
|
||||
# set(FAISS_MD5 "a589663865a8558205533c8ac414278c")
|
||||
# set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0
|
||||
# set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0
|
||||
# set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1
|
||||
set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1
|
||||
|
||||
execute_process(COMMAND wget -q --method HEAD ${FAISS_SOURCE_URL} RESULT_VARIABLE return_code)
|
||||
message(STATUS "Check the remote cache file ${FAISS_SOURCE_URL}. return code = ${return_code}")
|
||||
execute_process(COMMAND wget -q --method HEAD ${FAISS_URL} RESULT_VARIABLE return_code)
|
||||
message(STATUS "Check the remote cache file ${FAISS_URL}. return code = ${return_code}")
|
||||
if (NOT return_code EQUAL 0)
|
||||
set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz")
|
||||
MESSAGE(FATAL_ERROR "Can't access to ${FAISS_URL}")
|
||||
else()
|
||||
# set(FAISS_MD5 "a589663865a8558205533c8ac414278c")
|
||||
# set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0
|
||||
# set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0
|
||||
# set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1
|
||||
set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1
|
||||
endif()
|
||||
else()
|
||||
set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz")
|
||||
|
@ -1,7 +1,3 @@
|
||||
set(TBB_DIR ${CORE_SOURCE_DIR}/thirdparty/tbb)
|
||||
set(TBB_LIBRARIES ${TBB_DIR}/libtbb.so)
|
||||
include_directories(${TBB_DIR}/include)
|
||||
|
||||
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
|
||||
|
||||
@ -60,7 +56,6 @@ set(index_srcs
|
||||
|
||||
set(depend_libs
|
||||
SPTAGLibStatic
|
||||
${TBB_LIBRARIES}
|
||||
faiss
|
||||
openblas
|
||||
lapack
|
||||
@ -107,13 +102,6 @@ INSTALL(FILES ${OPENBLAS_REAL_STATIC_LIB}
|
||||
DESTINATION lib
|
||||
)
|
||||
|
||||
INSTALL(FILES ${CORE_SOURCE_DIR}/thirdparty/tbb/libtbb.so.2
|
||||
DESTINATION lib
|
||||
)
|
||||
INSTALL(FILES ${CORE_SOURCE_DIR}/thirdparty/tbb/libtbb.so
|
||||
DESTINATION lib
|
||||
)
|
||||
|
||||
set(CORE_INCLUDE_DIRS
|
||||
${CORE_SOURCE_DIR}/knowhere
|
||||
${CORE_SOURCE_DIR}/thirdparty
|
||||
@ -122,7 +110,6 @@ set(CORE_INCLUDE_DIRS
|
||||
${FAISS_INCLUDE_DIR}
|
||||
${OPENBLAS_INCLUDE_DIR}
|
||||
${LAPACK_INCLUDE_DIR}
|
||||
${CORE_SOURCE_DIR}/thirdparty/tbb/include
|
||||
)
|
||||
|
||||
set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
|
||||
@ -132,7 +119,6 @@ set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
|
||||
# ${ARROW_INCLUDE_DIR}/arrow
|
||||
# ${FAISS_PREFIX}/include/faiss
|
||||
# ${OPENBLAS_INCLUDE_DIR}/
|
||||
# ${CORE_SOURCE_DIR}/thirdparty/tbb/include/tbb
|
||||
# DESTINATION
|
||||
# include)
|
||||
#
|
||||
|
@ -96,7 +96,7 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
|
||||
auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);
|
||||
return new_idx;
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,7 +152,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) {
|
||||
gpu_mode = 1;
|
||||
return q;
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
|
||||
}
|
||||
}
|
||||
|
||||
@ -215,7 +215,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
|
||||
auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res);
|
||||
return sq_idx;
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
|
||||
}
|
||||
}
|
||||
|
||||
@ -242,7 +242,7 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c
|
||||
q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
|
||||
return std::make_pair(new_idx, q);
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,42 +36,47 @@ BinarySet
|
||||
CPUKDTRNG::Serialize() {
|
||||
std::vector<void*> index_blobs;
|
||||
std::vector<int64_t> index_len;
|
||||
index_ptr_->SaveIndexToMemory(index_blobs, index_len);
|
||||
|
||||
// TODO(zirui): dev
|
||||
// index_ptr_->SaveIndexToMemory(index_blobs, index_len);
|
||||
BinarySet binary_set;
|
||||
|
||||
auto sample = std::make_shared<uint8_t>();
|
||||
sample.reset(static_cast<uint8_t*>(index_blobs[0]));
|
||||
auto tree = std::make_shared<uint8_t>();
|
||||
tree.reset(static_cast<uint8_t*>(index_blobs[1]));
|
||||
auto graph = std::make_shared<uint8_t>();
|
||||
graph.reset(static_cast<uint8_t*>(index_blobs[2]));
|
||||
auto metadata = std::make_shared<uint8_t>();
|
||||
metadata.reset(static_cast<uint8_t*>(index_blobs[3]));
|
||||
|
||||
binary_set.Append("samples", sample, index_len[0]);
|
||||
binary_set.Append("tree", tree, index_len[1]);
|
||||
binary_set.Append("graph", graph, index_len[2]);
|
||||
binary_set.Append("metadata", metadata, index_len[3]);
|
||||
//
|
||||
// auto sample = std::make_shared<uint8_t>();
|
||||
// sample.reset(static_cast<uint8_t*>(index_blobs[0]));
|
||||
// auto tree = std::make_shared<uint8_t>();
|
||||
// tree.reset(static_cast<uint8_t*>(index_blobs[1]));
|
||||
// auto graph = std::make_shared<uint8_t>();
|
||||
// graph.reset(static_cast<uint8_t*>(index_blobs[2]));
|
||||
// auto metadata = std::make_shared<uint8_t>();
|
||||
// metadata.reset(static_cast<uint8_t*>(index_blobs[3]));
|
||||
//
|
||||
// binary_set.Append("samples", sample, index_len[0]);
|
||||
// binary_set.Append("tree", tree, index_len[1]);
|
||||
// binary_set.Append("graph", graph, index_len[2]);
|
||||
// binary_set.Append("metadata", metadata, index_len[3]);
|
||||
return binary_set;
|
||||
}
|
||||
|
||||
void
|
||||
CPUKDTRNG::Load(const BinarySet& binary_set) {
|
||||
std::vector<void*> index_blobs;
|
||||
// TODO(zirui): dev
|
||||
|
||||
auto samples = binary_set.GetByName("samples");
|
||||
index_blobs.push_back(samples->data.get());
|
||||
|
||||
auto tree = binary_set.GetByName("tree");
|
||||
index_blobs.push_back(tree->data.get());
|
||||
|
||||
auto graph = binary_set.GetByName("graph");
|
||||
index_blobs.push_back(graph->data.get());
|
||||
|
||||
auto metadata = binary_set.GetByName("metadata");
|
||||
index_blobs.push_back(metadata->data.get());
|
||||
|
||||
index_ptr_->LoadIndexFromMemory(index_blobs);
|
||||
// std::vector<void*> index_blobs;
|
||||
//
|
||||
// auto samples = binary_set.GetByName("samples");
|
||||
// index_blobs.push_back(samples->data.get());
|
||||
//
|
||||
// auto tree = binary_set.GetByName("tree");
|
||||
// index_blobs.push_back(tree->data.get());
|
||||
//
|
||||
// auto graph = binary_set.GetByName("graph");
|
||||
// index_blobs.push_back(graph->data.get());
|
||||
//
|
||||
// auto metadata = binary_set.GetByName("metadata");
|
||||
// index_blobs.push_back(metadata->data.get());
|
||||
//
|
||||
// index_ptr_->LoadIndexFromMemory(index_blobs);
|
||||
}
|
||||
|
||||
// PreprocessorPtr
|
||||
|
2
core/src/index/thirdparty/SPTAG/.gitignore
vendored
2
core/src/index/thirdparty/SPTAG/.gitignore
vendored
@ -89,5 +89,3 @@ dkms.conf
|
||||
/Wrappers/inc/AnnClient.java
|
||||
/AnnService.users - Copy.props
|
||||
/.vs
|
||||
Release/
|
||||
Debug/
|
||||
|
@ -1,44 +1,41 @@
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h)
|
||||
file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp)
|
||||
file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/VectorSetReaders/*.h)
|
||||
file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/VectorSetReaders/*.cpp)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/AnnService)
|
||||
|
||||
add_library (SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES})
|
||||
target_link_libraries (SPTAGLib ${TBB_LIBRARIES})
|
||||
target_link_libraries (SPTAGLib)
|
||||
add_library (SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES})
|
||||
set_target_properties(SPTAGLibStatic PROPERTIES OUTPUT_NAME SPTAGLib)
|
||||
|
||||
file(GLOB SERVER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Server/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h)
|
||||
file(GLOB SERVER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Server/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp)
|
||||
add_executable (server ${SERVER_FILES} ${SERVER_HDR_FILES})
|
||||
target_link_libraries(server ${Boost_LIBRARIES} ${TBB_LIBRARIES})
|
||||
target_link_libraries(server ${Boost_LIBRARIES})
|
||||
|
||||
file(GLOB CLIENT_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h)
|
||||
file(GLOB CLIENT_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp)
|
||||
add_executable (client ${CLIENT_FILES} ${CLIENT_HDR_FILES})
|
||||
target_link_libraries(client ${Boost_LIBRARIES} ${TBB_LIBRARIES})
|
||||
target_link_libraries(client ${Boost_LIBRARIES})
|
||||
|
||||
file(GLOB AGG_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Aggregator/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h)
|
||||
file(GLOB AGG_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Aggregator/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp)
|
||||
add_executable (aggregator ${AGG_FILES} ${AGG_HDR_FILES})
|
||||
target_link_libraries(aggregator ${Boost_LIBRARIES} ${TBB_LIBRARIES})
|
||||
target_link_libraries(aggregator ${Boost_LIBRARIES})
|
||||
|
||||
file(GLOB BUILDER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/VectorSetReaders/*.h)
|
||||
file(GLOB BUILDER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/VectorSetReaders/*.cpp)
|
||||
file(GLOB BUILDER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/*.h)
|
||||
file(GLOB BUILDER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/*.cpp)
|
||||
add_executable (indexbuilder ${BUILDER_FILES} ${BUILDER_HDR_FILES})
|
||||
target_link_libraries(indexbuilder ${Boost_LIBRARIES} ${TBB_LIBRARIES})
|
||||
target_link_libraries(indexbuilder ${Boost_LIBRARIES})
|
||||
|
||||
file(GLOB SEARCHER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexSearcher/*.cpp)
|
||||
add_executable (indexsearcher ${SEARCHER_FILES} ${HDR_FILES})
|
||||
target_link_libraries(indexsearcher ${Boost_LIBRARIES} ${TBB_LIBRARIES})
|
||||
target_link_libraries(indexsearcher ${Boost_LIBRARIES})
|
||||
|
||||
install(TARGETS SPTAGLib SPTAGLibStatic server client aggregator indexbuilder indexsearcher
|
||||
RUNTIME DESTINATION bin
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
|
||||
install(DIRECTORY inc DESTINATION include/sptag
|
||||
FILES_MATCHING PATTERN "*.h")
|
@ -149,25 +149,29 @@
|
||||
<ClInclude Include="inc\Core\DefinitionList.h" />
|
||||
<ClInclude Include="inc\Core\MetadataSet.h" />
|
||||
<ClInclude Include="inc\Core\SearchQuery.h" />
|
||||
<ClInclude Include="inc\Core\SearchResult.h" />
|
||||
<ClInclude Include="inc\Core\VectorIndex.h" />
|
||||
<ClInclude Include="inc\Core\VectorSet.h" />
|
||||
<ClInclude Include="inc\Helper\ArgumentsParser.h" />
|
||||
<ClInclude Include="inc\Helper\Base64Encode.h" />
|
||||
<ClInclude Include="inc\Helper\BufferStream.h" />
|
||||
<ClInclude Include="inc\Helper\CommonHelper.h" />
|
||||
<ClInclude Include="inc\Helper\Concurrent.h" />
|
||||
<ClInclude Include="inc\Helper\ConcurrentSet.h" />
|
||||
<ClInclude Include="inc\Helper\SimpleIniReader.h" />
|
||||
<ClInclude Include="inc\Helper\StringConvert.h" />
|
||||
<ClInclude Include="inc\Core\Common\NeighborhoodGraph.h" />
|
||||
<ClInclude Include="inc\Core\Common\RelativeNeighborhoodGraph.h" />
|
||||
<ClInclude Include="inc\Core\Common\BKTree.h" />
|
||||
<ClInclude Include="inc\Core\Common\KDTree.h" />
|
||||
<ClInclude Include="inc\Helper\VectorSetReader.h" />
|
||||
<ClInclude Include="inc\Helper\VectorSetReaders\DefaultReader.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\Core\BKT\BKTIndex.cpp" />
|
||||
<ClCompile Include="src\Core\Common\NeighborhoodGraph.cpp" />
|
||||
<ClCompile Include="src\Core\KDT\KDTIndex.cpp" />
|
||||
<ClCompile Include="src\Core\Common\WorkSpacePool.cpp" />
|
||||
<ClCompile Include="src\Core\CommonDataStructure.cpp" />
|
||||
<ClCompile Include="src\Core\MetadataSet.cpp" />
|
||||
<ClCompile Include="src\Core\VectorIndex.cpp" />
|
||||
<ClCompile Include="src\Core\VectorSet.cpp" />
|
||||
@ -176,18 +180,13 @@
|
||||
<ClCompile Include="src\Helper\CommonHelper.cpp" />
|
||||
<ClCompile Include="src\Helper\Concurrent.cpp" />
|
||||
<ClCompile Include="src\Helper\SimpleIniReader.cpp" />
|
||||
<ClCompile Include="src\Helper\VectorSetReader.cpp" />
|
||||
<ClCompile Include="src\Helper\VectorSetReaders\DefaultReader.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
|
||||
</ImportGroup>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
|
||||
</PropertyGroup>
|
||||
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
|
||||
</Target>
|
||||
</Project>
|
@ -38,6 +38,12 @@
|
||||
<Filter Include="Source Files\Core\KDT">
|
||||
<UniqueIdentifier>{8fb36afb-73ed-4c3d-8c9b-c3581d80c5d1}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Header Files\Helper\VectorSetReaders">
|
||||
<UniqueIdentifier>{f7bc0bc7-1af5-4870-b8ee-fabdbabdb4c4}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\Helper\VectorSetReaders">
|
||||
<UniqueIdentifier>{5c1449e0-38b7-4c82-976e-cbdc488d3fb5}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\Core\Common.h">
|
||||
@ -52,6 +58,9 @@
|
||||
<ClInclude Include="inc\Core\SearchQuery.h">
|
||||
<Filter>Header Files\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\Core\SearchResult.h">
|
||||
<Filter>Header Files\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\Core\VectorIndex.h">
|
||||
<Filter>Header Files\Core</Filter>
|
||||
</ClInclude>
|
||||
@ -130,11 +139,20 @@
|
||||
<ClInclude Include="inc\Core\Common\BKTree.h">
|
||||
<Filter>Header Files\Core\Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\Helper\ConcurrentSet.h">
|
||||
<Filter>Header Files\Helper</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\Helper\BufferStream.h">
|
||||
<Filter>Header Files\Helper</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\Helper\VectorSetReaders\DefaultReader.h">
|
||||
<Filter>Header Files\Helper\VectorSetReaders</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\Helper\VectorSetReader.h">
|
||||
<Filter>Header Files\Helper</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\Core\CommonDataStructure.cpp">
|
||||
<Filter>Source Files\Core</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\Core\VectorIndex.cpp">
|
||||
<Filter>Source Files\Core</Filter>
|
||||
</ClCompile>
|
||||
@ -171,6 +189,12 @@
|
||||
<ClCompile Include="src\Core\Common\NeighborhoodGraph.cpp">
|
||||
<Filter>Source Files\Core\Common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\Helper\VectorSetReaders\DefaultReader.cpp">
|
||||
<Filter>Source Files\Helper\VectorSetReaders</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\Helper\VectorSetReader.cpp">
|
||||
<Filter>Source Files\Helper</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="packages.config" />
|
||||
|
@ -139,15 +139,11 @@
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\IndexBuilder\Options.h" />
|
||||
<ClInclude Include="inc\IndexBuilder\ThreadPool.h" />
|
||||
<ClInclude Include="inc\IndexBuilder\VectorSetReader.h" />
|
||||
<ClInclude Include="inc\IndexBuilder\VectorSetReaders\DefaultReader.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\IndexBuilder\main.cpp" />
|
||||
<ClCompile Include="src\IndexBuilder\Options.cpp" />
|
||||
<ClCompile Include="src\IndexBuilder\ThreadPool.cpp" />
|
||||
<ClCompile Include="src\IndexBuilder\VectorSetReader.cpp" />
|
||||
<ClCompile Include="src\IndexBuilder\VectorSetReaders\DefaultReader.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="packages.config" />
|
||||
@ -161,7 +157,6 @@
|
||||
<Import Project="..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets" Condition="Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets" Condition="Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets" Condition="Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" />
|
||||
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
|
||||
</ImportGroup>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
@ -174,6 +169,5 @@
|
||||
<Error Condition="!Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
|
||||
</Target>
|
||||
</Project>
|
@ -1,4 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
@ -9,12 +9,6 @@
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Header Files\VectorSetReaders">
|
||||
<UniqueIdentifier>{cf68b421-6a65-44f2-bf43-438b13940d7d}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\VectorSetReaders">
|
||||
<UniqueIdentifier>{41ac91f9-6b6d-4341-8791-12f672d6ad5c}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\IndexBuilder\Options.h">
|
||||
@ -23,28 +17,16 @@
|
||||
<ClInclude Include="inc\IndexBuilder\ThreadPool.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\IndexBuilder\VectorSetReader.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\IndexBuilder\VectorSetReaders\DefaultReader.h">
|
||||
<Filter>Header Files\VectorSetReaders</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\IndexBuilder\Options.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\IndexBuilder\ThreadPool.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\IndexBuilder\VectorSetReader.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\IndexBuilder\VectorSetReaders\DefaultReader.cpp">
|
||||
<Filter>Source Files\VectorSetReaders</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\IndexBuilder\main.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\IndexBuilder\ThreadPool.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -154,7 +154,6 @@
|
||||
<Import Project="..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets" Condition="Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets" Condition="Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets" Condition="Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" />
|
||||
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
|
||||
</ImportGroup>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
@ -167,6 +166,5 @@
|
||||
<Error Condition="!Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
|
||||
</Target>
|
||||
</Project>
|
@ -137,7 +137,6 @@
|
||||
<Import Project="..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets" Condition="Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets" Condition="Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets" Condition="Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" />
|
||||
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
|
||||
</ImportGroup>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
@ -150,6 +149,5 @@
|
||||
<Error Condition="!Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
|
||||
</Target>
|
||||
</Project>
|
@ -15,12 +15,12 @@
|
||||
#include "../Common/WorkSpacePool.h"
|
||||
#include "../Common/RelativeNeighborhoodGraph.h"
|
||||
#include "../Common/BKTree.h"
|
||||
#include "inc/Helper/ConcurrentSet.h"
|
||||
#include "inc/Helper/SimpleIniReader.h"
|
||||
#include "inc/Helper/StringConvert.h"
|
||||
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <tbb/concurrent_unordered_set.h>
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
@ -48,35 +48,38 @@ namespace SPTAG
|
||||
std::string m_sBKTFilename;
|
||||
std::string m_sGraphFilename;
|
||||
std::string m_sDataPointsFilename;
|
||||
std::string m_sDeleteDataPointsFilename;
|
||||
|
||||
std::mutex m_dataLock; // protect data and graph
|
||||
tbb::concurrent_unordered_set<int> m_deletedID;
|
||||
std::mutex m_dataAddLock; // protect data and graph
|
||||
Helper::Concurrent::ConcurrentSet<SizeType> m_deletedID;
|
||||
float m_fDeletePercentageForRefine;
|
||||
std::unique_ptr<COMMON::WorkSpacePool> m_workSpacePool;
|
||||
|
||||
int m_iNumberOfThreads;
|
||||
DistCalcMethod m_iDistCalcMethod;
|
||||
float(*m_fComputeDistance)(const T* pX, const T* pY, int length);
|
||||
float(*m_fComputeDistance)(const T* pX, const T* pY, DimensionType length);
|
||||
|
||||
int m_iMaxCheck;
|
||||
int m_iThresholdOfNumberOfContinuousNoBetterPropagation;
|
||||
int m_iNumberOfInitialDynamicPivots;
|
||||
int m_iNumberOfOtherDynamicPivots;
|
||||
public:
|
||||
Index()
|
||||
{
|
||||
Index()
|
||||
{
|
||||
#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \
|
||||
VarName = DefaultValue; \
|
||||
|
||||
#include "inc/Core/BKT/ParameterDefinitionList.h"
|
||||
#undef DefineBKTParameter
|
||||
|
||||
m_fComputeDistance = COMMON::DistanceCalcSelector<T>(m_iDistCalcMethod);
|
||||
}
|
||||
m_pSamples.SetName("Vector");
|
||||
m_fComputeDistance = COMMON::DistanceCalcSelector<T>(m_iDistCalcMethod);
|
||||
}
|
||||
|
||||
~Index() {}
|
||||
|
||||
inline int GetNumSamples() const { return m_pSamples.R(); }
|
||||
inline int GetFeatureDim() const { return m_pSamples.C(); }
|
||||
inline SizeType GetNumSamples() const { return m_pSamples.R(); }
|
||||
inline DimensionType GetFeatureDim() const { return m_pSamples.C(); }
|
||||
|
||||
inline int GetCurrMaxCheck() const { return m_iMaxCheck; }
|
||||
inline int GetNumThreads() const { return m_iNumberOfThreads; }
|
||||
@ -85,25 +88,41 @@ namespace SPTAG
|
||||
inline VectorValueType GetVectorValueType() const { return GetEnumValueType<T>(); }
|
||||
|
||||
inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); }
|
||||
inline const void* GetSample(const int idx) const { return (void*)m_pSamples[idx]; }
|
||||
inline const void* GetSample(const SizeType idx) const { return (void*)m_pSamples[idx]; }
|
||||
inline bool ContainSample(const SizeType idx) const { return !m_deletedID.contains(idx); }
|
||||
inline bool NeedRefine() const { return m_deletedID.size() >= (size_t)(GetNumSamples() * m_fDeletePercentageForRefine); }
|
||||
std::shared_ptr<std::vector<std::uint64_t>> BufferSize() const
|
||||
{
|
||||
std::shared_ptr<std::vector<std::uint64_t>> buffersize(new std::vector<std::uint64_t>);
|
||||
buffersize->push_back(m_pSamples.BufferSize());
|
||||
buffersize->push_back(m_pTrees.BufferSize());
|
||||
buffersize->push_back(m_pGraph.BufferSize());
|
||||
buffersize->push_back(m_deletedID.bufferSize());
|
||||
return std::move(buffersize);
|
||||
}
|
||||
|
||||
ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension);
|
||||
ErrorCode SaveConfig(std::ostream& p_configout) const;
|
||||
ErrorCode SaveIndexData(const std::string& p_folderPath);
|
||||
ErrorCode SaveIndexData(const std::vector<std::ostream*>& p_indexStreams);
|
||||
|
||||
ErrorCode SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t>& p_indexBlobsLen);
|
||||
ErrorCode LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs);
|
||||
ErrorCode LoadConfig(Helper::IniReader& p_reader);
|
||||
ErrorCode LoadIndexData(const std::string& p_folderPath);
|
||||
ErrorCode LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs);
|
||||
|
||||
ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout);
|
||||
ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader);
|
||||
ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension);
|
||||
ErrorCode SearchIndex(QueryResult &p_query) const;
|
||||
ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension);
|
||||
ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum);
|
||||
ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr);
|
||||
ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum);
|
||||
ErrorCode DeleteIndex(const SizeType& p_id);
|
||||
|
||||
ErrorCode SetParameter(const char* p_param, const char* p_value);
|
||||
std::string GetParameter(const char* p_param) const;
|
||||
|
||||
private:
|
||||
ErrorCode RefineIndex(const std::string& p_folderPath);
|
||||
void SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set<int> &p_deleted) const;
|
||||
ErrorCode RefineIndex(const std::vector<std::ostream*>& p_indexStreams);
|
||||
|
||||
private:
|
||||
void SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet<SizeType> &p_deleted) const;
|
||||
void SearchIndexWithoutDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space) const;
|
||||
};
|
||||
} // namespace BKT
|
||||
|
@ -7,6 +7,7 @@
|
||||
DefineBKTParameter(m_sBKTFilename, std::string, std::string("tree.bin"), "TreeFilePath")
|
||||
DefineBKTParameter(m_sGraphFilename, std::string, std::string("graph.bin"), "GraphFilePath")
|
||||
DefineBKTParameter(m_sDataPointsFilename, std::string, std::string("vectors.bin"), "VectorFilePath")
|
||||
DefineBKTParameter(m_sDeleteDataPointsFilename, std::string, std::string("deletes.bin"), "DeleteVectorFilePath")
|
||||
|
||||
DefineBKTParameter(m_pTrees.m_iTreeNumber, int, 1L, "BKTNumber")
|
||||
DefineBKTParameter(m_pTrees.m_iBKTKmeansK, int, 32L, "BKTKmeansK")
|
||||
@ -14,11 +15,11 @@ DefineBKTParameter(m_pTrees.m_iBKTLeafSize, int, 8L, "BKTLeafSize")
|
||||
DefineBKTParameter(m_pTrees.m_iSamples, int, 1000L, "Samples")
|
||||
|
||||
|
||||
DefineBKTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TpTreeNumber")
|
||||
DefineBKTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TPTNumber")
|
||||
DefineBKTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize")
|
||||
DefineBKTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTpTreeSplit")
|
||||
|
||||
DefineBKTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize")
|
||||
DefineBKTParameter(m_pGraph.m_iNeighborhoodSize, DimensionType, 32L, "NeighborhoodSize")
|
||||
DefineBKTParameter(m_pGraph.m_iNeighborhoodScale, int, 2L, "GraphNeighborhoodScale")
|
||||
DefineBKTParameter(m_pGraph.m_iCEFScale, int, 2L, "GraphCEFScale")
|
||||
DefineBKTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations")
|
||||
@ -28,6 +29,7 @@ DefineBKTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckFor
|
||||
DefineBKTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads")
|
||||
DefineBKTParameter(m_iDistCalcMethod, SPTAG::DistCalcMethod, SPTAG::DistCalcMethod::Cosine, "DistCalcMethod")
|
||||
|
||||
DefineBKTParameter(m_fDeletePercentageForRefine, float, 0.4F, "DeletePercentageForRefine")
|
||||
DefineBKTParameter(m_iMaxCheck, int, 8192L, "MaxCheck")
|
||||
DefineBKTParameter(m_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation")
|
||||
DefineBKTParameter(m_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots")
|
||||
|
@ -56,9 +56,10 @@ inline bool fileexists(const char* path) {
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
typedef std::int32_t SizeType;
|
||||
typedef std::int32_t DimensionType;
|
||||
|
||||
typedef std::uint32_t SizeType;
|
||||
|
||||
const SizeType MaxSize = (std::numeric_limits<SizeType>::max)();
|
||||
const float MinDist = (std::numeric_limits<float>::min)();
|
||||
const float MaxDist = (std::numeric_limits<float>::max)();
|
||||
const float Epsilon = 0.000000001f;
|
||||
@ -76,11 +77,6 @@ public:
|
||||
#endif
|
||||
};
|
||||
|
||||
// Type of number index.
|
||||
typedef std::int32_t IndexType;
|
||||
static_assert(std::is_integral<IndexType>::value, "IndexType must be integral type.");
|
||||
|
||||
|
||||
enum class ErrorCode : std::uint16_t
|
||||
{
|
||||
#define DefineErrorCode(Name, Value) Name = Value,
|
||||
|
@ -24,34 +24,34 @@ namespace SPTAG
|
||||
// node type for storing BKT
|
||||
struct BKTNode
|
||||
{
|
||||
int centerid;
|
||||
int childStart;
|
||||
int childEnd;
|
||||
SizeType centerid;
|
||||
SizeType childStart;
|
||||
SizeType childEnd;
|
||||
|
||||
BKTNode(int cid = -1) : centerid(cid), childStart(-1), childEnd(-1) {}
|
||||
BKTNode(SizeType cid = -1) : centerid(cid), childStart(-1), childEnd(-1) {}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct KmeansArgs {
|
||||
int _K;
|
||||
int _D;
|
||||
DimensionType _D;
|
||||
int _T;
|
||||
T* centers;
|
||||
int* counts;
|
||||
SizeType* counts;
|
||||
float* newCenters;
|
||||
int* newCounts;
|
||||
char* label;
|
||||
int* clusterIdx;
|
||||
SizeType* newCounts;
|
||||
int* label;
|
||||
SizeType* clusterIdx;
|
||||
float* clusterDist;
|
||||
T* newTCenters;
|
||||
|
||||
KmeansArgs(int k, int dim, int datasize, int threadnum) : _K(k), _D(dim), _T(threadnum) {
|
||||
KmeansArgs(int k, DimensionType dim, SizeType datasize, int threadnum) : _K(k), _D(dim), _T(threadnum) {
|
||||
centers = new T[k * dim];
|
||||
counts = new int[k];
|
||||
counts = new SizeType[k];
|
||||
newCenters = new float[threadnum * k * dim];
|
||||
newCounts = new int[threadnum * k];
|
||||
label = new char[datasize];
|
||||
clusterIdx = new int[threadnum * k];
|
||||
newCounts = new SizeType[threadnum * k];
|
||||
label = new int[datasize];
|
||||
clusterIdx = new SizeType[threadnum * k];
|
||||
clusterDist = new float[threadnum * k];
|
||||
newTCenters = new T[k * dim];
|
||||
}
|
||||
@ -68,7 +68,7 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
inline void ClearCounts() {
|
||||
memset(newCounts, 0, sizeof(int) * _T * _K);
|
||||
memset(newCounts, 0, sizeof(SizeType) * _T * _K);
|
||||
}
|
||||
|
||||
inline void ClearCenters() {
|
||||
@ -82,17 +82,17 @@ namespace SPTAG
|
||||
}
|
||||
}
|
||||
|
||||
void Shuffle(std::vector<int>& indices, int first, int last) {
|
||||
int* pos = new int[_K];
|
||||
void Shuffle(std::vector<SizeType>& indices, SizeType first, SizeType last) {
|
||||
SizeType* pos = new SizeType[_K];
|
||||
pos[0] = first;
|
||||
for (int k = 1; k < _K; k++) pos[k] = pos[k - 1] + newCounts[k - 1];
|
||||
|
||||
for (int k = 0; k < _K; k++) {
|
||||
if (newCounts[k] == 0) continue;
|
||||
int i = pos[k];
|
||||
SizeType i = pos[k];
|
||||
while (newCounts[k] > 0) {
|
||||
int swapid = pos[(int)(label[i])] + newCounts[(int)(label[i])] - 1;
|
||||
newCounts[(int)(label[i])]--;
|
||||
SizeType swapid = pos[label[i]] + newCounts[label[i]] - 1;
|
||||
newCounts[label[i]]--;
|
||||
std::swap(indices[i], indices[swapid]);
|
||||
std::swap(label[i], label[swapid]);
|
||||
}
|
||||
@ -114,59 +114,59 @@ namespace SPTAG
|
||||
m_iSamples(other.m_iSamples) {}
|
||||
~BKTree() {}
|
||||
|
||||
inline const BKTNode& operator[](int index) const { return m_pTreeRoots[index]; }
|
||||
inline BKTNode& operator[](int index) { return m_pTreeRoots[index]; }
|
||||
inline const BKTNode& operator[](SizeType index) const { return m_pTreeRoots[index]; }
|
||||
inline BKTNode& operator[](SizeType index) { return m_pTreeRoots[index]; }
|
||||
|
||||
inline int size() const { return (int)m_pTreeRoots.size(); }
|
||||
inline SizeType size() const { return (SizeType)m_pTreeRoots.size(); }
|
||||
|
||||
inline const std::unordered_map<int, int>& GetSampleMap() const { return m_pSampleCenterMap; }
|
||||
inline const std::unordered_map<SizeType, SizeType>& GetSampleMap() const { return m_pSampleCenterMap; }
|
||||
|
||||
template <typename T>
|
||||
void BuildTrees(VectorIndex* index, std::vector<int>* indices = nullptr)
|
||||
void BuildTrees(VectorIndex* index, std::vector<SizeType>* indices = nullptr)
|
||||
{
|
||||
struct BKTStackItem {
|
||||
int index, first, last;
|
||||
BKTStackItem(int index_, int first_, int last_) : index(index_), first(first_), last(last_) {}
|
||||
SizeType index, first, last;
|
||||
BKTStackItem(SizeType index_, SizeType first_, SizeType last_) : index(index_), first(first_), last(last_) {}
|
||||
};
|
||||
std::stack<BKTStackItem> ss;
|
||||
|
||||
std::vector<int> localindices;
|
||||
std::vector<SizeType> localindices;
|
||||
if (indices == nullptr) {
|
||||
localindices.resize(index->GetNumSamples());
|
||||
for (int i = 0; i < index->GetNumSamples(); i++) localindices[i] = i;
|
||||
for (SizeType i = 0; i < index->GetNumSamples(); i++) localindices[i] = i;
|
||||
}
|
||||
else {
|
||||
localindices.assign(indices->begin(), indices->end());
|
||||
}
|
||||
KmeansArgs<T> args(m_iBKTKmeansK, index->GetFeatureDim(), (int)localindices.size(), omp_get_num_threads());
|
||||
KmeansArgs<T> args(m_iBKTKmeansK, index->GetFeatureDim(), (SizeType)localindices.size(), omp_get_num_threads());
|
||||
|
||||
m_pSampleCenterMap.clear();
|
||||
for (char i = 0; i < m_iTreeNumber; i++)
|
||||
{
|
||||
std::random_shuffle(localindices.begin(), localindices.end());
|
||||
|
||||
m_pTreeStart.push_back((int)m_pTreeRoots.size());
|
||||
m_pTreeRoots.push_back(BKTNode((int)localindices.size()));
|
||||
m_pTreeStart.push_back((SizeType)m_pTreeRoots.size());
|
||||
m_pTreeRoots.push_back(BKTNode((SizeType)localindices.size()));
|
||||
std::cout << "Start to build BKTree " << i + 1 << std::endl;
|
||||
|
||||
ss.push(BKTStackItem(m_pTreeStart[i], 0, (int)localindices.size()));
|
||||
ss.push(BKTStackItem(m_pTreeStart[i], 0, (SizeType)localindices.size()));
|
||||
while (!ss.empty()) {
|
||||
BKTStackItem item = ss.top(); ss.pop();
|
||||
int newBKTid = (int)m_pTreeRoots.size();
|
||||
SizeType newBKTid = (SizeType)m_pTreeRoots.size();
|
||||
m_pTreeRoots[item.index].childStart = newBKTid;
|
||||
if (item.last - item.first <= m_iBKTLeafSize) {
|
||||
for (int j = item.first; j < item.last; j++) {
|
||||
for (SizeType j = item.first; j < item.last; j++) {
|
||||
m_pTreeRoots.push_back(BKTNode(localindices[j]));
|
||||
}
|
||||
}
|
||||
else { // clustering the data into BKTKmeansK clusters
|
||||
int numClusters = KmeansClustering(index, localindices, item.first, item.last, args);
|
||||
if (numClusters <= 1) {
|
||||
int end = min(item.last + 1, (int)localindices.size());
|
||||
SizeType end = min(item.last + 1, (SizeType)localindices.size());
|
||||
std::sort(localindices.begin() + item.first, localindices.begin() + end);
|
||||
m_pTreeRoots[item.index].centerid = localindices[item.first];
|
||||
m_pTreeRoots[item.index].childStart = -m_pTreeRoots[item.index].childStart;
|
||||
for (int j = item.first + 1; j < end; j++) {
|
||||
for (SizeType j = item.first + 1; j < end; j++) {
|
||||
m_pTreeRoots.push_back(BKTNode(localindices[j]));
|
||||
m_pSampleCenterMap[localindices[j]] = m_pTreeRoots[item.index].centerid;
|
||||
}
|
||||
@ -181,53 +181,36 @@ namespace SPTAG
|
||||
}
|
||||
}
|
||||
}
|
||||
m_pTreeRoots[item.index].childEnd = (int)m_pTreeRoots.size();
|
||||
m_pTreeRoots[item.index].childEnd = (SizeType)m_pTreeRoots.size();
|
||||
}
|
||||
std::cout << i + 1 << " BKTree built, " << m_pTreeRoots.size() - m_pTreeStart[i] << " " << localindices.size() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
bool SaveTrees(void **pKDTMemFile, int64_t &len) const
|
||||
inline std::uint64_t BufferSize() const
|
||||
{
|
||||
int treeNodeSize = (int)m_pTreeRoots.size();
|
||||
|
||||
size_t size = sizeof(int) +
|
||||
sizeof(int) * m_iTreeNumber +
|
||||
sizeof(int) +
|
||||
sizeof(BKTNode) * treeNodeSize;
|
||||
char *mem = (char*)malloc(size);
|
||||
if (mem == NULL) return false;
|
||||
|
||||
auto ptr = mem;
|
||||
*(int*)ptr = m_iTreeNumber;
|
||||
ptr += sizeof(int);
|
||||
|
||||
memcpy(ptr, m_pTreeStart.data(), sizeof(int) * m_iTreeNumber);
|
||||
ptr += sizeof(int) * m_iTreeNumber;
|
||||
|
||||
*(int*)ptr = treeNodeSize;
|
||||
ptr += sizeof(int);
|
||||
|
||||
memcpy(ptr, m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize);
|
||||
*pKDTMemFile = mem;
|
||||
len = size;
|
||||
return sizeof(int) + sizeof(SizeType) * m_iTreeNumber +
|
||||
sizeof(SizeType) + sizeof(BKTNode) * m_pTreeRoots.size();
|
||||
}
|
||||
|
||||
bool SaveTrees(std::ostream& p_outstream) const
|
||||
{
|
||||
p_outstream.write((char*)&m_iTreeNumber, sizeof(int));
|
||||
p_outstream.write((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber);
|
||||
SizeType treeNodeSize = (SizeType)m_pTreeRoots.size();
|
||||
p_outstream.write((char*)&treeNodeSize, sizeof(SizeType));
|
||||
p_outstream.write((char*)m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize);
|
||||
std::cout << "Save BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SaveTrees(std::string sTreeFileName) const
|
||||
{
|
||||
std::cout << "Save BKT to " << sTreeFileName << std::endl;
|
||||
FILE *fp = fopen(sTreeFileName.c_str(), "wb");
|
||||
if (fp == NULL) return false;
|
||||
|
||||
fwrite(&m_iTreeNumber, sizeof(int), 1, fp);
|
||||
fwrite(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp);
|
||||
int treeNodeSize = (int)m_pTreeRoots.size();
|
||||
fwrite(&treeNodeSize, sizeof(int), 1, fp);
|
||||
fwrite(m_pTreeRoots.data(), sizeof(BKTNode), treeNodeSize, fp);
|
||||
fclose(fp);
|
||||
std::cout << "Save BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
|
||||
std::ofstream output(sTreeFileName, std::ios::binary);
|
||||
if (!output.is_open()) return false;
|
||||
SaveTrees(output);
|
||||
output.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -236,31 +219,32 @@ namespace SPTAG
|
||||
m_iTreeNumber = *((int*)pBKTMemFile);
|
||||
pBKTMemFile += sizeof(int);
|
||||
m_pTreeStart.resize(m_iTreeNumber);
|
||||
memcpy(m_pTreeStart.data(), pBKTMemFile, sizeof(int) * m_iTreeNumber);
|
||||
pBKTMemFile += sizeof(int)*m_iTreeNumber;
|
||||
memcpy(m_pTreeStart.data(), pBKTMemFile, sizeof(SizeType) * m_iTreeNumber);
|
||||
pBKTMemFile += sizeof(SizeType)*m_iTreeNumber;
|
||||
|
||||
int treeNodeSize = *((int*)pBKTMemFile);
|
||||
pBKTMemFile += sizeof(int);
|
||||
SizeType treeNodeSize = *((SizeType*)pBKTMemFile);
|
||||
pBKTMemFile += sizeof(SizeType);
|
||||
m_pTreeRoots.resize(treeNodeSize);
|
||||
memcpy(m_pTreeRoots.data(), pBKTMemFile, sizeof(BKTNode) * treeNodeSize);
|
||||
std::cout << "Load BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LoadTrees(std::string sTreeFileName)
|
||||
{
|
||||
std::cout << "Load BKT From " << sTreeFileName << std::endl;
|
||||
FILE *fp = fopen(sTreeFileName.c_str(), "rb");
|
||||
if (fp == NULL) return false;
|
||||
std::ifstream input(sTreeFileName, std::ios::binary);
|
||||
if (!input.is_open()) return false;
|
||||
|
||||
fread(&m_iTreeNumber, sizeof(int), 1, fp);
|
||||
input.read((char*)&m_iTreeNumber, sizeof(int));
|
||||
m_pTreeStart.resize(m_iTreeNumber);
|
||||
fread(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp);
|
||||
input.read((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber);
|
||||
|
||||
int treeNodeSize;
|
||||
fread(&treeNodeSize, sizeof(int), 1, fp);
|
||||
SizeType treeNodeSize;
|
||||
input.read((char*)&treeNodeSize, sizeof(SizeType));
|
||||
m_pTreeRoots.resize(treeNodeSize);
|
||||
fread(m_pTreeRoots.data(), sizeof(BKTNode), treeNodeSize, fp);
|
||||
fclose(fp);
|
||||
input.read((char*)m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize);
|
||||
input.close();
|
||||
std::cout << "Load BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
@ -274,9 +258,9 @@ namespace SPTAG
|
||||
p_space.m_SPTQueue.insert(COMMON::HeapCell(m_pTreeStart[i], p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(node.centerid))));
|
||||
}
|
||||
else {
|
||||
for (int begin = node.childStart; begin < node.childEnd; begin++) {
|
||||
int index = m_pTreeRoots[begin].centerid;
|
||||
p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index))));
|
||||
for (SizeType begin = node.childStart; begin < node.childEnd; begin++) {
|
||||
SizeType index = m_pTreeRoots[begin].centerid;
|
||||
p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index))));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -301,8 +285,8 @@ namespace SPTAG
|
||||
if (!p_space.CheckAndSet(tnode.centerid)) {
|
||||
p_space.m_NGQueue.insert(COMMON::HeapCell(tnode.centerid, bcell.distance));
|
||||
}
|
||||
for (int begin = tnode.childStart; begin < tnode.childEnd; begin++) {
|
||||
int index = m_pTreeRoots[begin].centerid;
|
||||
for (SizeType begin = tnode.childStart; begin < tnode.childEnd; begin++) {
|
||||
SizeType index = m_pTreeRoots[begin].centerid;
|
||||
p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index))));
|
||||
}
|
||||
}
|
||||
@ -313,24 +297,24 @@ namespace SPTAG
|
||||
|
||||
template <typename T>
|
||||
float KmeansAssign(VectorIndex* p_index,
|
||||
std::vector<int>& indices,
|
||||
const int first, const int last, KmeansArgs<T>& args, const bool updateCenters) const {
|
||||
std::vector<SizeType>& indices,
|
||||
const SizeType first, const SizeType last, KmeansArgs<T>& args, const bool updateCenters) const {
|
||||
float currDist = 0;
|
||||
int threads = omp_get_num_threads();
|
||||
float lambda = (updateCenters) ? COMMON::Utils::GetBase<T>() * COMMON::Utils::GetBase<T>() / (100.0f * (last - first)) : 0.0f;
|
||||
int subsize = (last - first - 1) / threads + 1;
|
||||
SizeType subsize = (last - first - 1) / threads + 1;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int tid = 0; tid < threads; tid++)
|
||||
{
|
||||
int istart = first + tid * subsize;
|
||||
int iend = min(first + (tid + 1) * subsize, last);
|
||||
int *inewCounts = args.newCounts + tid * m_iBKTKmeansK;
|
||||
SizeType istart = first + tid * subsize;
|
||||
SizeType iend = min(first + (tid + 1) * subsize, last);
|
||||
SizeType *inewCounts = args.newCounts + tid * m_iBKTKmeansK;
|
||||
float *inewCenters = args.newCenters + tid * m_iBKTKmeansK * p_index->GetFeatureDim();
|
||||
int * iclusterIdx = args.clusterIdx + tid * m_iBKTKmeansK;
|
||||
SizeType * iclusterIdx = args.clusterIdx + tid * m_iBKTKmeansK;
|
||||
float * iclusterDist = args.clusterDist + tid * m_iBKTKmeansK;
|
||||
float idist = 0;
|
||||
for (int i = istart; i < iend; i++) {
|
||||
for (SizeType i = istart; i < iend; i++) {
|
||||
int clusterid = 0;
|
||||
float smallestDist = MaxDist;
|
||||
for (int k = 0; k < m_iBKTKmeansK; k++) {
|
||||
@ -345,7 +329,7 @@ namespace SPTAG
|
||||
if (updateCenters) {
|
||||
const T* v = (const T*)p_index->GetSample(indices[i]);
|
||||
float* center = inewCenters + clusterid*p_index->GetFeatureDim();
|
||||
for (int j = 0; j < p_index->GetFeatureDim(); j++) center[j] += v[j];
|
||||
for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) center[j] += v[j];
|
||||
if (smallestDist > iclusterDist[clusterid]) {
|
||||
iclusterDist[clusterid] = smallestDist;
|
||||
iclusterIdx[clusterid] = indices[i];
|
||||
@ -369,36 +353,50 @@ namespace SPTAG
|
||||
if (updateCenters) {
|
||||
for (int i = 1; i < threads; i++) {
|
||||
float* currCenter = args.newCenters + i*m_iBKTKmeansK*p_index->GetFeatureDim();
|
||||
for (int j = 0; j < m_iBKTKmeansK * p_index->GetFeatureDim(); j++) args.newCenters[j] += currCenter[j];
|
||||
for (size_t j = 0; j < ((size_t)m_iBKTKmeansK) * p_index->GetFeatureDim(); j++) args.newCenters[j] += currCenter[j];
|
||||
|
||||
for (int k = 0; k < m_iBKTKmeansK; k++) {
|
||||
if (args.clusterIdx[i*m_iBKTKmeansK + k] != -1 && args.clusterDist[i*m_iBKTKmeansK + k] > args.clusterDist[k]) {
|
||||
args.clusterDist[k] = args.clusterDist[i*m_iBKTKmeansK + k];
|
||||
args.clusterIdx[k] = args.clusterIdx[i*m_iBKTKmeansK + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int maxcluster = 0;
|
||||
for (int k = 1; k < m_iBKTKmeansK; k++) if (args.newCounts[maxcluster] < args.newCounts[k]) maxcluster = k;
|
||||
|
||||
int maxid = maxcluster;
|
||||
for (int tid = 1; tid < threads; tid++) {
|
||||
if (args.clusterDist[maxid] < args.clusterDist[tid * m_iBKTKmeansK + maxcluster]) maxid = tid * m_iBKTKmeansK + maxcluster;
|
||||
int maxcluster = -1;
|
||||
SizeType maxCount = 0;
|
||||
for (int k = 0; k < m_iBKTKmeansK; k++) {
|
||||
if (args.newCounts[k] > maxCount && DistanceUtils::ComputeL2Distance((T*)p_index->GetSample(args.clusterIdx[k]), args.centers + k * p_index->GetFeatureDim(), p_index->GetFeatureDim()) > 1e-6)
|
||||
{
|
||||
maxcluster = k;
|
||||
maxCount = args.newCounts[k];
|
||||
}
|
||||
}
|
||||
if (args.clusterIdx[maxid] < 0 || args.clusterIdx[maxid] >= p_index->GetNumSamples())
|
||||
std::cout << "first:" << first << " last:" << last << " maxcluster:" << maxcluster << "(" << args.newCounts[maxcluster] << ") Error maxid:" << maxid << " dist:" << args.clusterDist[maxid] << std::endl;
|
||||
maxid = args.clusterIdx[maxid];
|
||||
|
||||
if (maxcluster != -1 && (args.clusterIdx[maxcluster] < 0 || args.clusterIdx[maxcluster] >= p_index->GetNumSamples()))
|
||||
std::cout << "first:" << first << " last:" << last << " maxcluster:" << maxcluster << "(" << args.newCounts[maxcluster] << ") Error dist:" << args.clusterDist[maxcluster] << std::endl;
|
||||
|
||||
for (int k = 0; k < m_iBKTKmeansK; k++) {
|
||||
T* TCenter = args.newTCenters + k * p_index->GetFeatureDim();
|
||||
if (args.newCounts[k] == 0) {
|
||||
//int nextid = Utils::rand_int(last, first);
|
||||
//while (args.label[nextid] != maxcluster) nextid = Utils::rand_int(last, first);
|
||||
int nextid = maxid;
|
||||
std::memcpy(TCenter, p_index->GetSample(nextid), sizeof(T)*p_index->GetFeatureDim());
|
||||
if (maxcluster != -1) {
|
||||
//int nextid = Utils::rand_int(last, first);
|
||||
//while (args.label[nextid] != maxcluster) nextid = Utils::rand_int(last, first);
|
||||
SizeType nextid = args.clusterIdx[maxcluster];
|
||||
std::memcpy(TCenter, p_index->GetSample(nextid), sizeof(T)*p_index->GetFeatureDim());
|
||||
}
|
||||
else {
|
||||
std::memcpy(TCenter, args.centers + k * p_index->GetFeatureDim(), sizeof(T)*p_index->GetFeatureDim());
|
||||
}
|
||||
}
|
||||
else {
|
||||
float* currCenters = args.newCenters + k * p_index->GetFeatureDim();
|
||||
for (int j = 0; j < p_index->GetFeatureDim(); j++) currCenters[j] /= args.newCounts[k];
|
||||
for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) currCenters[j] /= args.newCounts[k];
|
||||
|
||||
if (p_index->GetDistCalcMethod() == DistCalcMethod::Cosine) {
|
||||
COMMON::Utils::Normalize(currCenters, p_index->GetFeatureDim(), COMMON::Utils::GetBase<T>());
|
||||
}
|
||||
for (int j = 0; j < p_index->GetFeatureDim(); j++) TCenter[j] = (T)(currCenters[j]);
|
||||
for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) TCenter[j] = (T)(currCenters[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -417,14 +415,14 @@ namespace SPTAG
|
||||
|
||||
template <typename T>
|
||||
int KmeansClustering(VectorIndex* p_index,
|
||||
std::vector<int>& indices, const int first, const int last, KmeansArgs<T>& args) const {
|
||||
std::vector<SizeType>& indices, const SizeType first, const SizeType last, KmeansArgs<T>& args) const {
|
||||
int iterLimit = 100;
|
||||
|
||||
int batchEnd = min(first + m_iSamples, last);
|
||||
SizeType batchEnd = min(first + m_iSamples, last);
|
||||
float currDiff, currDist, minClusterDist = MaxDist;
|
||||
for (int numKmeans = 0; numKmeans < 3; numKmeans++) {
|
||||
for (int k = 0; k < m_iBKTKmeansK; k++) {
|
||||
int randid = COMMON::Utils::rand_int(last, first);
|
||||
SizeType randid = COMMON::Utils::rand(last, first);
|
||||
std::memcpy(args.centers + k*p_index->GetFeatureDim(), p_index->GetSample(indices[randid]), sizeof(T)*p_index->GetFeatureDim());
|
||||
}
|
||||
args.ClearCounts();
|
||||
@ -432,7 +430,7 @@ namespace SPTAG
|
||||
if (currDist < minClusterDist) {
|
||||
minClusterDist = currDist;
|
||||
memcpy(args.newTCenters, args.centers, sizeof(T)*m_iBKTKmeansK*p_index->GetFeatureDim());
|
||||
memcpy(args.counts, args.newCounts, sizeof(int) * m_iBKTKmeansK);
|
||||
memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK);
|
||||
}
|
||||
}
|
||||
|
||||
@ -446,7 +444,7 @@ namespace SPTAG
|
||||
args.ClearCounts();
|
||||
args.ClearDists(-MaxDist);
|
||||
currDist = KmeansAssign(p_index, indices, first, batchEnd, args, true);
|
||||
memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK);
|
||||
memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK);
|
||||
|
||||
currDiff = 0;
|
||||
for (int k = 0; k < m_iBKTKmeansK; k++) {
|
||||
@ -466,7 +464,7 @@ namespace SPTAG
|
||||
args.ClearCounts();
|
||||
args.ClearDists(MaxDist);
|
||||
currDist = KmeansAssign(p_index, indices, first, last, args, false);
|
||||
memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK);
|
||||
memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK);
|
||||
|
||||
int numClusters = 0;
|
||||
for (int i = 0; i < m_iBKTKmeansK; i++) if (args.counts[i] > 0) numClusters++;
|
||||
@ -480,9 +478,9 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<int> m_pTreeStart;
|
||||
std::vector<SizeType> m_pTreeStart;
|
||||
std::vector<BKTNode> m_pTreeRoots;
|
||||
std::unordered_map<int, int> m_pSampleCenterMap;
|
||||
std::unordered_map<SizeType, SizeType> m_pSampleCenterMap;
|
||||
|
||||
public:
|
||||
int m_iTreeNumber, m_iBKTKmeansK, m_iBKTLeafSize, m_iSamples;
|
||||
|
@ -36,9 +36,9 @@ namespace SPTAG
|
||||
{
|
||||
class Utils {
|
||||
public:
|
||||
static int rand_int(int high = RAND_MAX, int low = 0) // Generates a random int value.
|
||||
static SizeType rand(SizeType high = MaxSize, SizeType low = 0) // Generates a random int value.
|
||||
{
|
||||
return low + (int)(float(high - low)*(std::rand() / (RAND_MAX + 1.0)));
|
||||
return low + (SizeType)(float(high - low)*(std::rand() / (RAND_MAX + 1.0)));
|
||||
}
|
||||
|
||||
static inline float atomic_float_add(volatile float* ptr, const float operand)
|
||||
@ -61,11 +61,11 @@ namespace SPTAG
|
||||
}
|
||||
}
|
||||
|
||||
static double GetVector(char* cstr, const char* sep, std::vector<float>& arr, int& NumDim) {
|
||||
static double GetVector(char* cstr, const char* sep, std::vector<float>& arr, DimensionType& NumDim) {
|
||||
char* current;
|
||||
char* context = NULL;
|
||||
|
||||
int i = 0;
|
||||
DimensionType i = 0;
|
||||
double sum = 0;
|
||||
arr.clear();
|
||||
current = strtok_s(cstr, sep, &context);
|
||||
@ -90,23 +90,23 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void Normalize(T* arr, int col, int base) {
|
||||
static void Normalize(T* arr, DimensionType col, int base) {
|
||||
double vecLen = 0;
|
||||
for (int j = 0; j < col; j++) {
|
||||
for (DimensionType j = 0; j < col; j++) {
|
||||
double val = arr[j];
|
||||
vecLen += val * val;
|
||||
}
|
||||
vecLen = std::sqrt(vecLen);
|
||||
if (vecLen < 1e-6) {
|
||||
T val = (T)(1.0 / std::sqrt((double)col) * base);
|
||||
for (int j = 0; j < col; j++) arr[j] = val;
|
||||
for (DimensionType j = 0; j < col; j++) arr[j] = val;
|
||||
}
|
||||
else {
|
||||
for (int j = 0; j < col; j++) arr[j] = (T)(arr[j] / vecLen * base);
|
||||
for (DimensionType j = 0; j < col; j++) arr[j] = (T)(arr[j] / vecLen * base);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t ProcessLine(std::string& currentLine, std::vector<float>& arr, int& D, int base, DistCalcMethod distCalcMethod) {
|
||||
static size_t ProcessLine(std::string& currentLine, std::vector<float>& arr, DimensionType& D, int base, DistCalcMethod distCalcMethod) {
|
||||
size_t index;
|
||||
double vecLen;
|
||||
if (currentLine.length() == 0 || (index = currentLine.find_last_of("\t")) == std::string::npos || (vecLen = GetVector(const_cast<char*>(currentLine.c_str() + index + 1), "|", arr, D)) < -1) {
|
||||
@ -121,10 +121,10 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void PrepareQuerys(std::ifstream& inStream, std::vector<std::string>& qString, std::vector<std::vector<T>>& Query, int& NumQuery, int& NumDim, DistCalcMethod distCalcMethod, int base) {
|
||||
static void PrepareQuerys(std::ifstream& inStream, std::vector<std::string>& qString, std::vector<std::vector<T>>& Query, SizeType& NumQuery, DimensionType& NumDim, DistCalcMethod distCalcMethod, int base) {
|
||||
std::string currentLine;
|
||||
std::vector<float> arr;
|
||||
int i = 0;
|
||||
SizeType i = 0;
|
||||
size_t index;
|
||||
while ((NumQuery < 0 || i < NumQuery) && !inStream.eof()) {
|
||||
std::getline(inStream, currentLine);
|
||||
@ -132,9 +132,9 @@ namespace SPTAG
|
||||
continue;
|
||||
}
|
||||
qString.push_back(currentLine.substr(0, index));
|
||||
if (Query.size() < i + 1) Query.push_back(std::vector<T>(NumDim, 0));
|
||||
if ((SizeType)Query.size() < i + 1) Query.push_back(std::vector<T>(NumDim, 0));
|
||||
|
||||
for (int j = 0; j < NumDim; j++) Query[i][j] = (T)arr[j];
|
||||
for (DimensionType j = 0; j < NumDim; j++) Query[i][j] = (T)arr[j];
|
||||
i++;
|
||||
}
|
||||
NumQuery = i;
|
||||
@ -149,12 +149,12 @@ namespace SPTAG
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void AddNeighbor(int idx, float dist, int *neighbors, float *dists, int size)
|
||||
static inline void AddNeighbor(SizeType idx, float dist, SizeType *neighbors, float *dists, DimensionType size)
|
||||
{
|
||||
size--;
|
||||
if (dist < dists[size] || (dist == dists[size] && idx < neighbors[size]))
|
||||
{
|
||||
int nb;
|
||||
DimensionType nb;
|
||||
for (nb = 0; nb <= size && neighbors[nb] != idx; nb++);
|
||||
|
||||
if (nb > size)
|
||||
|
@ -13,158 +13,18 @@ namespace SPTAG
|
||||
{
|
||||
namespace COMMON
|
||||
{
|
||||
const int bufsize = 1024 * 1024 * 1024;
|
||||
const int bufsize = 1 << 30;
|
||||
|
||||
class DataUtils {
|
||||
public:
|
||||
template <typename T>
|
||||
static void ProcessTSVData(int id, int threadbase, std::uint64_t blocksize,
|
||||
std::string filename, std::string outfile, std::string outmetafile, std::string outmetaindexfile,
|
||||
std::atomic_int& numSamples, int& D, DistCalcMethod distCalcMethod) {
|
||||
std::ifstream inputStream(filename);
|
||||
if (!inputStream.is_open()) {
|
||||
std::cerr << "unable to open file " + filename << std::endl;
|
||||
throw MyException("unable to open file " + filename);
|
||||
exit(1);
|
||||
}
|
||||
std::ofstream outputStream, metaStream_out, metaStream_index;
|
||||
outputStream.open(outfile + std::to_string(id + threadbase), std::ofstream::binary);
|
||||
metaStream_out.open(outmetafile + std::to_string(id + threadbase), std::ofstream::binary);
|
||||
metaStream_index.open(outmetaindexfile + std::to_string(id + threadbase), std::ofstream::binary);
|
||||
if (!outputStream.is_open() || !metaStream_out.is_open() || !metaStream_index.is_open()) {
|
||||
std::cerr << "unable to open output file " << outfile << " " << outmetafile << " " << outmetaindexfile << std::endl;
|
||||
throw MyException("unable to open output files");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::vector<float> arr;
|
||||
std::vector<T> sample;
|
||||
|
||||
int base = 1;
|
||||
if (distCalcMethod == DistCalcMethod::Cosine) {
|
||||
base = Utils::GetBase<T>();
|
||||
}
|
||||
std::uint64_t writepos = 0;
|
||||
int sampleSize = 0;
|
||||
std::uint64_t totalread = 0;
|
||||
std::streamoff startpos = id * blocksize;
|
||||
|
||||
#ifndef _MSC_VER
|
||||
int enter_size = 1;
|
||||
#else
|
||||
int enter_size = 1;
|
||||
#endif
|
||||
std::string currentLine;
|
||||
size_t index;
|
||||
inputStream.seekg(startpos, std::ifstream::beg);
|
||||
if (id != 0) {
|
||||
std::getline(inputStream, currentLine);
|
||||
totalread += currentLine.length() + enter_size;
|
||||
}
|
||||
std::cout << "Begin thread " << id << " begin at:" << (startpos + totalread) << std::endl;
|
||||
while (!inputStream.eof() && totalread <= blocksize) {
|
||||
std::getline(inputStream, currentLine);
|
||||
if (currentLine.length() <= enter_size || (index = Utils::ProcessLine(currentLine, arr, D, base, distCalcMethod)) < 0) {
|
||||
totalread += currentLine.length() + enter_size;
|
||||
continue;
|
||||
}
|
||||
sample.resize(D);
|
||||
for (int j = 0; j < D; j++) sample[j] = (T)arr[j];
|
||||
|
||||
outputStream.write((char *)(sample.data()), sizeof(T)*D);
|
||||
metaStream_index.write((char *)&writepos, sizeof(std::uint64_t));
|
||||
metaStream_out.write(currentLine.c_str(), index);
|
||||
|
||||
writepos += index;
|
||||
sampleSize += 1;
|
||||
totalread += currentLine.length() + enter_size;
|
||||
}
|
||||
metaStream_index.write((char *)&writepos, sizeof(std::uint64_t));
|
||||
metaStream_index.write((char *)&sampleSize, sizeof(int));
|
||||
inputStream.close();
|
||||
outputStream.close();
|
||||
metaStream_out.close();
|
||||
metaStream_index.close();
|
||||
|
||||
numSamples.fetch_add(sampleSize);
|
||||
|
||||
std::cout << "Finish Thread[" << id << ", " << sampleSize << "] at:" << (startpos + totalread) << std::endl;
|
||||
}
|
||||
|
||||
static void MergeData(int threadbase, std::string outfile, std::string outmetafile, std::string outmetaindexfile,
|
||||
std::atomic_int& numSamples, int D) {
|
||||
std::ifstream inputStream;
|
||||
std::ofstream outputStream;
|
||||
char * buf = new char[bufsize];
|
||||
std::uint64_t * offsets;
|
||||
int partSamples;
|
||||
int metaSamples = 0;
|
||||
std::uint64_t lastoff = 0;
|
||||
|
||||
outputStream.open(outfile, std::ofstream::binary);
|
||||
outputStream.write((char *)&numSamples, sizeof(int));
|
||||
outputStream.write((char *)&D, sizeof(int));
|
||||
for (int i = 0; i < threadbase; i++) {
|
||||
std::string file = outfile + std::to_string(i);
|
||||
inputStream.open(file, std::ifstream::binary);
|
||||
while (!inputStream.eof()) {
|
||||
inputStream.read(buf, bufsize);
|
||||
outputStream.write(buf, inputStream.gcount());
|
||||
}
|
||||
inputStream.close();
|
||||
remove(file.c_str());
|
||||
}
|
||||
outputStream.close();
|
||||
|
||||
outputStream.open(outmetafile, std::ofstream::binary);
|
||||
for (int i = 0; i < threadbase; i++) {
|
||||
std::string file = outmetafile + std::to_string(i);
|
||||
inputStream.open(file, std::ifstream::binary);
|
||||
while (!inputStream.eof()) {
|
||||
inputStream.read(buf, bufsize);
|
||||
outputStream.write(buf, inputStream.gcount());
|
||||
}
|
||||
inputStream.close();
|
||||
remove(file.c_str());
|
||||
}
|
||||
outputStream.close();
|
||||
delete[] buf;
|
||||
|
||||
outputStream.open(outmetaindexfile, std::ofstream::binary);
|
||||
outputStream.write((char *)&numSamples, sizeof(int));
|
||||
for (int i = 0; i < threadbase; i++) {
|
||||
std::string file = outmetaindexfile + std::to_string(i);
|
||||
inputStream.open(file, std::ifstream::binary);
|
||||
|
||||
inputStream.seekg(-((long long)sizeof(int)), inputStream.end);
|
||||
inputStream.read((char *)&partSamples, sizeof(int));
|
||||
offsets = new std::uint64_t[partSamples + 1];
|
||||
|
||||
inputStream.seekg(0, inputStream.beg);
|
||||
inputStream.read((char *)offsets, sizeof(std::uint64_t)*(partSamples + 1));
|
||||
inputStream.close();
|
||||
remove(file.c_str());
|
||||
|
||||
for (int j = 0; j < partSamples + 1; j++)
|
||||
offsets[j] += lastoff;
|
||||
outputStream.write((char *)offsets, sizeof(std::uint64_t)*partSamples);
|
||||
|
||||
lastoff = offsets[partSamples];
|
||||
metaSamples += partSamples;
|
||||
delete[] offsets;
|
||||
}
|
||||
outputStream.write((char *)&lastoff, sizeof(std::uint64_t));
|
||||
outputStream.close();
|
||||
|
||||
std::cout << "numSamples:" << numSamples << " metaSamples:" << metaSamples << " D:" << D << std::endl;
|
||||
}
|
||||
|
||||
static bool MergeIndex(const std::string& p_vectorfile1, const std::string& p_metafile1, const std::string& p_metaindexfile1,
|
||||
const std::string& p_vectorfile2, const std::string& p_metafile2, const std::string& p_metaindexfile2) {
|
||||
std::ifstream inputStream1, inputStream2;
|
||||
std::ofstream outputStream;
|
||||
char * buf = new char[bufsize];
|
||||
int R1, R2, C1, C2;
|
||||
std::unique_ptr<char[]> bufferHolder(new char[bufsize]);
|
||||
char * buf = bufferHolder.get();
|
||||
SizeType R1, R2;
|
||||
DimensionType C1, C2;
|
||||
|
||||
#define MergeVector(inputStream, vectorFile, R, C) \
|
||||
inputStream.open(vectorFile, std::ifstream::binary); \
|
||||
@ -172,8 +32,8 @@ namespace SPTAG
|
||||
std::cout << "Cannot open vector file: " << vectorFile <<"!" << std::endl; \
|
||||
return false; \
|
||||
} \
|
||||
inputStream.read((char *)&(R), sizeof(int)); \
|
||||
inputStream.read((char *)&(C), sizeof(int)); \
|
||||
inputStream.read((char *)&(R), sizeof(SizeType)); \
|
||||
inputStream.read((char *)&(C), sizeof(DimensionType)); \
|
||||
|
||||
MergeVector(inputStream1, p_vectorfile1, R1, C1)
|
||||
MergeVector(inputStream2, p_vectorfile2, R2, C2)
|
||||
@ -185,8 +45,8 @@ namespace SPTAG
|
||||
}
|
||||
R1 += R2;
|
||||
outputStream.open(p_vectorfile1 + "_tmp", std::ofstream::binary);
|
||||
outputStream.write((char *)&R1, sizeof(int));
|
||||
outputStream.write((char *)&C1, sizeof(int));
|
||||
outputStream.write((char *)&R1, sizeof(SizeType));
|
||||
outputStream.write((char *)&C1, sizeof(DimensionType));
|
||||
while (!inputStream1.eof()) {
|
||||
inputStream1.read(buf, bufsize);
|
||||
outputStream.write(buf, inputStream1.gcount());
|
||||
@ -218,26 +78,22 @@ namespace SPTAG
|
||||
outputStream.close();
|
||||
delete[] buf;
|
||||
|
||||
|
||||
std::uint64_t * offsets;
|
||||
int partSamples;
|
||||
std::uint64_t * offsets = reinterpret_cast<std::uint64_t*>(buf);
|
||||
std::uint64_t lastoff = 0;
|
||||
outputStream.open(p_metaindexfile1 + "_tmp", std::ofstream::binary);
|
||||
outputStream.write((char *)&R1, sizeof(int));
|
||||
outputStream.write((char *)&R1, sizeof(SizeType));
|
||||
#define MergeMetaIndex(inputStream, metaIndexFile) \
|
||||
inputStream.open(metaIndexFile, std::ifstream::binary); \
|
||||
if (!inputStream.is_open()) { \
|
||||
std::cout << "Cannot open meta index file: " << metaIndexFile << "!" << std::endl; \
|
||||
return false; \
|
||||
} \
|
||||
inputStream.read((char *)&partSamples, sizeof(int)); \
|
||||
offsets = new std::uint64_t[partSamples + 1]; \
|
||||
inputStream.read((char *)offsets, sizeof(std::uint64_t)*(partSamples + 1)); \
|
||||
inputStream.read((char *)&R2, sizeof(SizeType)); \
|
||||
inputStream.read((char *)offsets, sizeof(std::uint64_t)*(R2 + 1)); \
|
||||
inputStream.close(); \
|
||||
for (int j = 0; j < partSamples + 1; j++) offsets[j] += lastoff; \
|
||||
outputStream.write((char *)offsets, sizeof(std::uint64_t)*partSamples); \
|
||||
lastoff = offsets[partSamples]; \
|
||||
delete[] offsets; \
|
||||
for (SizeType j = 0; j < R2 + 1; j++) offsets[j] += lastoff; \
|
||||
outputStream.write((char *)offsets, sizeof(std::uint64_t)*R2); \
|
||||
lastoff = offsets[R2]; \
|
||||
|
||||
MergeMetaIndex(inputStream1, p_metaindexfile1)
|
||||
MergeMetaIndex(inputStream2, p_metaindexfile2)
|
||||
@ -253,36 +109,6 @@ namespace SPTAG
|
||||
std::cout << "Merged -> numSamples:" << R1 << " D:" << C1 << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void ParseData(std::string filenames, std::string outfile, std::string outmetafile, std::string outmetaindexfile,
|
||||
int threadnum, DistCalcMethod distCalcMethod) {
|
||||
omp_set_num_threads(threadnum);
|
||||
|
||||
std::atomic_int numSamples = { 0 };
|
||||
int D = -1;
|
||||
|
||||
int threadbase = 0;
|
||||
std::vector<std::string> inputFileNames = Helper::StrUtils::SplitString(filenames, ",");
|
||||
for (std::string inputFileName : inputFileNames)
|
||||
{
|
||||
#ifndef _MSC_VER
|
||||
struct stat stat_buf;
|
||||
stat(inputFileName.c_str(), &stat_buf);
|
||||
#else
|
||||
struct _stat64 stat_buf;
|
||||
int res = _stat64(inputFileName.c_str(), &stat_buf);
|
||||
#endif
|
||||
std::uint64_t blocksize = (stat_buf.st_size + threadnum - 1) / threadnum;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < threadnum; i++) {
|
||||
ProcessTSVData<T>(i, threadbase, blocksize, inputFileName, outfile, outmetafile, outmetaindexfile, numSamples, D, distCalcMethod);
|
||||
}
|
||||
threadbase += threadnum;
|
||||
}
|
||||
MergeData(threadbase, outfile, outmetafile, outmetaindexfile, numSamples, D);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -28,23 +28,31 @@ namespace SPTAG
|
||||
class Dataset
|
||||
{
|
||||
private:
|
||||
int rows;
|
||||
int cols;
|
||||
std::string name = "Data";
|
||||
SizeType rows = 0;
|
||||
DimensionType cols = 1;
|
||||
bool ownData = false;
|
||||
T* data = nullptr;
|
||||
std::vector<T> dataIncremental;
|
||||
|
||||
SizeType incRows = 0;
|
||||
std::vector<T*> incBlocks;
|
||||
static const SizeType rowsInBlock = 1024 * 1024;
|
||||
public:
|
||||
Dataset(): rows(0), cols(1) {}
|
||||
Dataset(int rows_, int cols_, T* data_ = nullptr, bool transferOnwership_ = true)
|
||||
Dataset()
|
||||
{
|
||||
incBlocks.reserve(MaxSize / rowsInBlock + 1);
|
||||
}
|
||||
Dataset(SizeType rows_, DimensionType cols_, T* data_ = nullptr, bool transferOnwership_ = true)
|
||||
{
|
||||
Initialize(rows_, cols_, data_, transferOnwership_);
|
||||
incBlocks.reserve(MaxSize / rowsInBlock + 1);
|
||||
}
|
||||
~Dataset()
|
||||
{
|
||||
if (ownData) aligned_free(data);
|
||||
for (T* ptr : incBlocks) aligned_free(ptr);
|
||||
incBlocks.clear();
|
||||
}
|
||||
void Initialize(int rows_, int cols_, T* data_ = nullptr, bool transferOnwership_ = true)
|
||||
void Initialize(SizeType rows_, DimensionType cols_, T* data_ = nullptr, bool transferOnwership_ = true)
|
||||
{
|
||||
rows = rows_;
|
||||
cols = cols_;
|
||||
@ -52,161 +60,166 @@ namespace SPTAG
|
||||
if (data_ == nullptr || !transferOnwership_)
|
||||
{
|
||||
ownData = true;
|
||||
data = (T*)aligned_malloc(sizeof(T) * rows * cols, ALIGN);
|
||||
if (data_ != nullptr) memcpy(data, data_, rows * cols * sizeof(T));
|
||||
else std::memset(data, -1, rows * cols * sizeof(T));
|
||||
data = (T*)aligned_malloc(((size_t)rows) * cols * sizeof(T), ALIGN);
|
||||
if (data_ != nullptr) memcpy(data, data_, ((size_t)rows) * cols * sizeof(T));
|
||||
else std::memset(data, -1, ((size_t)rows) * cols * sizeof(T));
|
||||
}
|
||||
}
|
||||
void SetR(int R_)
|
||||
void SetName(const std::string name_) { name = name_; }
|
||||
void SetR(SizeType R_)
|
||||
{
|
||||
if (R_ >= rows)
|
||||
dataIncremental.resize((R_ - rows) * cols);
|
||||
else
|
||||
incRows = R_ - rows;
|
||||
else
|
||||
{
|
||||
rows = R_;
|
||||
dataIncremental.clear();
|
||||
incRows = 0;
|
||||
}
|
||||
}
|
||||
inline int R() const { return (int)(rows + dataIncremental.size() / cols); }
|
||||
inline int C() const { return cols; }
|
||||
T* operator[](int index)
|
||||
inline SizeType R() const { return rows + incRows; }
|
||||
inline DimensionType C() const { return cols; }
|
||||
inline std::uint64_t BufferSize() const { return sizeof(SizeType) + sizeof(DimensionType) + sizeof(T) * R() * C(); }
|
||||
|
||||
inline const T* At(SizeType index) const
|
||||
{
|
||||
if (index >= rows) {
|
||||
return dataIncremental.data() + (size_t)(index - rows)*cols;
|
||||
SizeType incIndex = index - rows;
|
||||
return incBlocks[incIndex / rowsInBlock] + ((size_t)(incIndex % rowsInBlock)) * cols;
|
||||
}
|
||||
return data + (size_t)index*cols;
|
||||
return data + ((size_t)index) * cols;
|
||||
}
|
||||
|
||||
const T* operator[](int index) const
|
||||
T* operator[](SizeType index)
|
||||
{
|
||||
if (index >= rows) {
|
||||
return dataIncremental.data() + (size_t)(index - rows)*cols;
|
||||
}
|
||||
return data + (size_t)index*cols;
|
||||
return (T*)At(index);
|
||||
}
|
||||
|
||||
const T* operator[](SizeType index) const
|
||||
{
|
||||
return At(index);
|
||||
}
|
||||
|
||||
void AddBatch(const T* pData, int num)
|
||||
ErrorCode AddBatch(const T* pData, SizeType num)
|
||||
{
|
||||
dataIncremental.insert(dataIncremental.end(), pData, pData + num*cols);
|
||||
if (R() > MaxSize - num) return ErrorCode::MemoryOverFlow;
|
||||
|
||||
SizeType written = 0;
|
||||
while (written < num) {
|
||||
SizeType curBlockIdx = (incRows + written) / rowsInBlock;
|
||||
if (curBlockIdx >= (SizeType)incBlocks.size()) {
|
||||
T* newBlock = (T*)aligned_malloc(((size_t)rowsInBlock) * cols * sizeof(T), ALIGN);
|
||||
if (newBlock == nullptr) return ErrorCode::MemoryOverFlow;
|
||||
incBlocks.push_back(newBlock);
|
||||
}
|
||||
SizeType curBlockPos = (incRows + written) % rowsInBlock;
|
||||
SizeType toWrite = min(rowsInBlock - curBlockPos, num - written);
|
||||
std::memcpy(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, pData + ((size_t)written) * cols, ((size_t)toWrite) * cols * sizeof(T));
|
||||
written += toWrite;
|
||||
}
|
||||
incRows += written;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
void AddBatch(int num)
|
||||
ErrorCode AddBatch(SizeType num)
|
||||
{
|
||||
dataIncremental.insert(dataIncremental.end(), (size_t)num*cols, T(-1));
|
||||
if (R() > MaxSize - num) return ErrorCode::MemoryOverFlow;
|
||||
|
||||
SizeType written = 0;
|
||||
while (written < num) {
|
||||
SizeType curBlockIdx = (incRows + written) / rowsInBlock;
|
||||
if (curBlockIdx >= (SizeType)incBlocks.size()) {
|
||||
T* newBlock = (T*)aligned_malloc(((size_t)rowsInBlock) * cols * sizeof(T), ALIGN);
|
||||
if (newBlock == nullptr) return ErrorCode::MemoryOverFlow;
|
||||
incBlocks.push_back(newBlock);
|
||||
}
|
||||
SizeType curBlockPos = (incRows + written) % rowsInBlock;
|
||||
SizeType toWrite = min(rowsInBlock - curBlockPos, num - written);
|
||||
std::memset(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, -1, ((size_t)toWrite) * cols * sizeof(T));
|
||||
written += toWrite;
|
||||
}
|
||||
incRows += written;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
bool Save(std::string sDataPointsFileName)
|
||||
bool Save(std::ostream& p_outstream) const
|
||||
{
|
||||
std::cout << "Save Data To " << sDataPointsFileName << std::endl;
|
||||
FILE * fp = fopen(sDataPointsFileName.c_str(), "wb");
|
||||
if (fp == NULL) return false;
|
||||
SizeType CR = R();
|
||||
p_outstream.write((char*)&CR, sizeof(SizeType));
|
||||
p_outstream.write((char*)&cols, sizeof(DimensionType));
|
||||
p_outstream.write((char*)data, sizeof(T) * cols * rows);
|
||||
|
||||
int CR = R();
|
||||
fwrite(&CR, sizeof(int), 1, fp);
|
||||
fwrite(&cols, sizeof(int), 1, fp);
|
||||
SizeType blocks = incRows / rowsInBlock;
|
||||
for (int i = 0; i < blocks; i++)
|
||||
p_outstream.write((char*)incBlocks[i], sizeof(T) * cols * rowsInBlock);
|
||||
|
||||
T* ptr = data;
|
||||
int toWrite = rows;
|
||||
while (toWrite > 0)
|
||||
{
|
||||
size_t write = fwrite(ptr, sizeof(T) * cols, toWrite, fp);
|
||||
ptr += write * cols;
|
||||
toWrite -= (int)write;
|
||||
}
|
||||
ptr = dataIncremental.data();
|
||||
toWrite = CR - rows;
|
||||
while (toWrite > 0)
|
||||
{
|
||||
size_t write = fwrite(ptr, sizeof(T) * cols, toWrite, fp);
|
||||
ptr += write * cols;
|
||||
toWrite -= (int)write;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
std::cout << "Save Data (" << CR << ", " << cols << ") Finish!" << std::endl;
|
||||
SizeType remain = incRows % rowsInBlock;
|
||||
if (remain > 0) p_outstream.write((char*)incBlocks[blocks], sizeof(T) * cols * remain);
|
||||
std::cout << "Save " << name << " (" << CR << ", " << cols << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Save(void **pDataPointsMemFile, int64_t &len)
|
||||
bool Save(std::string sDataPointsFileName) const
|
||||
{
|
||||
size_t size = sizeof(int) + sizeof(int) + sizeof(T) * R() *cols;
|
||||
char *mem = (char*)malloc(size);
|
||||
if (mem == NULL) return false;
|
||||
|
||||
int CR = R();
|
||||
|
||||
auto header = (int*)mem;
|
||||
header[0] = CR;
|
||||
header[1] = cols;
|
||||
auto body = &mem[8];
|
||||
|
||||
memcpy(body, data, sizeof(T) * cols * rows);
|
||||
body += sizeof(T) * cols * rows;
|
||||
memcpy(body, dataIncremental.data(), sizeof(T) * cols * (CR - rows));
|
||||
body += sizeof(T) * cols * (CR - rows);
|
||||
|
||||
*pDataPointsMemFile = mem;
|
||||
len = size;
|
||||
|
||||
std::cout << "Save " << name << " To " << sDataPointsFileName << std::endl;
|
||||
std::ofstream output(sDataPointsFileName, std::ios::binary);
|
||||
if (!output.is_open()) return false;
|
||||
Save(output);
|
||||
output.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Load(std::string sDataPointsFileName)
|
||||
{
|
||||
std::cout << "Load Data From " << sDataPointsFileName << std::endl;
|
||||
FILE * fp = fopen(sDataPointsFileName.c_str(), "rb");
|
||||
if (fp == NULL) return false;
|
||||
std::cout << "Load " << name << " From " << sDataPointsFileName << std::endl;
|
||||
std::ifstream input(sDataPointsFileName, std::ios::binary);
|
||||
if (!input.is_open()) return false;
|
||||
|
||||
int R, C;
|
||||
fread(&R, sizeof(int), 1, fp);
|
||||
fread(&C, sizeof(int), 1, fp);
|
||||
input.read((char*)&rows, sizeof(SizeType));
|
||||
input.read((char*)&cols, sizeof(DimensionType));
|
||||
|
||||
Initialize(R, C);
|
||||
T* ptr = data;
|
||||
while (R > 0) {
|
||||
size_t read = fread(ptr, sizeof(T) * C, R, fp);
|
||||
ptr += read * C;
|
||||
R -= (int)read;
|
||||
}
|
||||
fclose(fp);
|
||||
std::cout << "Load Data (" << rows << ", " << cols << ") Finish!" << std::endl;
|
||||
Initialize(rows, cols);
|
||||
input.read((char*)data, sizeof(T) * cols * rows);
|
||||
input.close();
|
||||
std::cout << "Load " << name << " (" << rows << ", " << cols << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Functions for loading models from memory mapped files
|
||||
bool Load(char* pDataPointsMemFile)
|
||||
{
|
||||
int R, C;
|
||||
R = *((int*)pDataPointsMemFile);
|
||||
pDataPointsMemFile += sizeof(int);
|
||||
SizeType R;
|
||||
DimensionType C;
|
||||
R = *((SizeType*)pDataPointsMemFile);
|
||||
pDataPointsMemFile += sizeof(SizeType);
|
||||
|
||||
C = *((int*)pDataPointsMemFile);
|
||||
pDataPointsMemFile += sizeof(int);
|
||||
C = *((DimensionType*)pDataPointsMemFile);
|
||||
pDataPointsMemFile += sizeof(DimensionType);
|
||||
|
||||
Initialize(R, C, (T*)pDataPointsMemFile);
|
||||
std::cout << "Load " << name << " (" << R << ", " << C << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Refine(const std::vector<int>& indices, std::string sDataPointsFileName)
|
||||
bool Refine(const std::vector<SizeType>& indices, std::ostream& output)
|
||||
{
|
||||
std::cout << "Save Refine Data To " << sDataPointsFileName << std::endl;
|
||||
FILE * fp = fopen(sDataPointsFileName.c_str(), "wb");
|
||||
if (fp == NULL) return false;
|
||||
SizeType R = (SizeType)(indices.size());
|
||||
output.write((char*)&R, sizeof(SizeType));
|
||||
output.write((char*)&cols, sizeof(DimensionType));
|
||||
|
||||
int R = (int)(indices.size());
|
||||
fwrite(&R, sizeof(int), 1, fp);
|
||||
fwrite(&cols, sizeof(int), 1, fp);
|
||||
|
||||
// write point one by one in case for cache miss
|
||||
for (int i = 0; i < R; i++) {
|
||||
if (indices[i] < rows)
|
||||
fwrite(data + (size_t)indices[i] * cols, sizeof(T) * cols, 1, fp);
|
||||
else
|
||||
fwrite(dataIncremental.data() + (size_t)(indices[i] - rows) * cols, sizeof(T) * cols, 1, fp);
|
||||
for (SizeType i = 0; i < R; i++) {
|
||||
output.write((char*)At(indices[i]), sizeof(T) * cols);
|
||||
}
|
||||
fclose(fp);
|
||||
std::cout << "Save Refine " << name << " (" << R << ", " << cols << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::cout << "Save Refine Data (" << R << ", " << cols << ") Finish!" << std::endl;
|
||||
bool Refine(const std::vector<SizeType>& indices, std::string sDataPointsFileName)
|
||||
{
|
||||
std::cout << "Save Refine " << name << " To " << sDataPointsFileName << std::endl;
|
||||
std::ofstream output(sDataPointsFileName, std::ios::binary);
|
||||
if (!output.is_open()) return false;
|
||||
Refine(indices, output);
|
||||
output.close();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
@ -199,7 +199,7 @@ namespace SPTAG
|
||||
#endif
|
||||
/*
|
||||
template<typename T>
|
||||
static float ComputeL2Distance(const T *pX, const T *pY, int length)
|
||||
static float ComputeL2Distance(const T *pX, const T *pY, DimensionType length)
|
||||
{
|
||||
float diff = 0;
|
||||
const T* pEnd1 = pX + length;
|
||||
@ -217,7 +217,7 @@ namespace SPTAG
|
||||
result = acc(result, exec(c1, c2)); \
|
||||
} \
|
||||
|
||||
static float ComputeL2Distance(const std::int8_t *pX, const std::int8_t *pY, int length)
|
||||
static float ComputeL2Distance(const std::int8_t *pX, const std::int8_t *pY, DimensionType length)
|
||||
{
|
||||
const std::int8_t* pEnd32 = pX + ((length >> 5) << 5);
|
||||
const std::int8_t* pEnd16 = pX + ((length >> 4) << 4);
|
||||
@ -258,7 +258,7 @@ namespace SPTAG
|
||||
return diff;
|
||||
}
|
||||
|
||||
static float ComputeL2Distance(const std::uint8_t *pX, const std::uint8_t *pY, int length)
|
||||
static float ComputeL2Distance(const std::uint8_t *pX, const std::uint8_t *pY, DimensionType length)
|
||||
{
|
||||
const std::uint8_t* pEnd32 = pX + ((length >> 5) << 5);
|
||||
const std::uint8_t* pEnd16 = pX + ((length >> 4) << 4);
|
||||
@ -299,7 +299,7 @@ namespace SPTAG
|
||||
return diff;
|
||||
}
|
||||
|
||||
static float ComputeL2Distance(const std::int16_t *pX, const std::int16_t *pY, int length)
|
||||
static float ComputeL2Distance(const std::int16_t *pX, const std::int16_t *pY, DimensionType length)
|
||||
{
|
||||
const std::int16_t* pEnd16 = pX + ((length >> 4) << 4);
|
||||
const std::int16_t* pEnd8 = pX + ((length >> 3) << 3);
|
||||
@ -341,7 +341,7 @@ namespace SPTAG
|
||||
return diff;
|
||||
}
|
||||
|
||||
static float ComputeL2Distance(const float *pX, const float *pY, int length)
|
||||
static float ComputeL2Distance(const float *pX, const float *pY, DimensionType length)
|
||||
{
|
||||
const float* pEnd16 = pX + ((length >> 4) << 4);
|
||||
const float* pEnd4 = pX + ((length >> 2) << 2);
|
||||
@ -389,14 +389,14 @@ namespace SPTAG
|
||||
}
|
||||
/*
|
||||
template<typename T>
|
||||
static float ComputeCosineDistance(const T *pX, const T *pY, int length) {
|
||||
static float ComputeCosineDistance(const T *pX, const T *pY, DimensionType length) {
|
||||
float diff = 0;
|
||||
const T* pEnd1 = pX + length;
|
||||
while (pX < pEnd1) diff += (*pX++) * (*pY++);
|
||||
return 1 - diff;
|
||||
}
|
||||
*/
|
||||
static float ComputeCosineDistance(const std::int8_t *pX, const std::int8_t *pY, int length) {
|
||||
static float ComputeCosineDistance(const std::int8_t *pX, const std::int8_t *pY, DimensionType length) {
|
||||
const std::int8_t* pEnd32 = pX + ((length >> 5) << 5);
|
||||
const std::int8_t* pEnd16 = pX + ((length >> 4) << 4);
|
||||
const std::int8_t* pEnd4 = pX + ((length >> 2) << 2);
|
||||
@ -436,7 +436,7 @@ namespace SPTAG
|
||||
return 16129 - diff;
|
||||
}
|
||||
|
||||
static float ComputeCosineDistance(const std::uint8_t *pX, const std::uint8_t *pY, int length) {
|
||||
static float ComputeCosineDistance(const std::uint8_t *pX, const std::uint8_t *pY, DimensionType length) {
|
||||
const std::uint8_t* pEnd32 = pX + ((length >> 5) << 5);
|
||||
const std::uint8_t* pEnd16 = pX + ((length >> 4) << 4);
|
||||
const std::uint8_t* pEnd4 = pX + ((length >> 2) << 2);
|
||||
@ -476,7 +476,7 @@ namespace SPTAG
|
||||
return 65025 - diff;
|
||||
}
|
||||
|
||||
static float ComputeCosineDistance(const std::int16_t *pX, const std::int16_t *pY, int length) {
|
||||
static float ComputeCosineDistance(const std::int16_t *pX, const std::int16_t *pY, DimensionType length) {
|
||||
const std::int16_t* pEnd16 = pX + ((length >> 4) << 4);
|
||||
const std::int16_t* pEnd8 = pX + ((length >> 3) << 3);
|
||||
const std::int16_t* pEnd4 = pX + ((length >> 2) << 2);
|
||||
@ -517,7 +517,7 @@ namespace SPTAG
|
||||
return 1073676289 - diff;
|
||||
}
|
||||
|
||||
static float ComputeCosineDistance(const float *pX, const float *pY, int length) {
|
||||
static float ComputeCosineDistance(const float *pX, const float *pY, DimensionType length) {
|
||||
const float* pEnd16 = pX + ((length >> 4) << 4);
|
||||
const float* pEnd4 = pX + ((length >> 2) << 2);
|
||||
const float* pEnd1 = pX + length;
|
||||
@ -564,7 +564,7 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline float ComputeDistance(const T *p1, const T *p2, int length, SPTAG::DistCalcMethod distCalcMethod)
|
||||
static inline float ComputeDistance(const T *p1, const T *p2, DimensionType length, SPTAG::DistCalcMethod distCalcMethod)
|
||||
{
|
||||
if (distCalcMethod == SPTAG::DistCalcMethod::L2)
|
||||
return ComputeL2Distance(p1, p2, length);
|
||||
@ -588,7 +588,7 @@ namespace SPTAG
|
||||
|
||||
|
||||
template<typename T>
|
||||
float (*DistanceCalcSelector(SPTAG::DistCalcMethod p_method)) (const T*, const T*, int)
|
||||
float (*DistanceCalcSelector(SPTAG::DistCalcMethod p_method)) (const T*, const T*, DimensionType)
|
||||
{
|
||||
switch (p_method)
|
||||
{
|
||||
|
@ -16,30 +16,30 @@ namespace SPTAG
|
||||
public:
|
||||
FineGrainedLock() {}
|
||||
~FineGrainedLock() {
|
||||
for (int i = 0; i < locks.size(); i++)
|
||||
for (size_t i = 0; i < locks.size(); i++)
|
||||
locks[i].reset();
|
||||
locks.clear();
|
||||
}
|
||||
|
||||
void resize(int n) {
|
||||
int current = (int)locks.size();
|
||||
void resize(SizeType n) {
|
||||
SizeType current = (SizeType)locks.size();
|
||||
if (current <= n) {
|
||||
locks.resize(n);
|
||||
for (int i = current; i < n; i++)
|
||||
for (SizeType i = current; i < n; i++)
|
||||
locks[i].reset(new std::mutex);
|
||||
}
|
||||
else {
|
||||
for (int i = n; i < current; i++)
|
||||
for (SizeType i = n; i < current; i++)
|
||||
locks[i].reset();
|
||||
locks.resize(n);
|
||||
}
|
||||
}
|
||||
|
||||
std::mutex& operator[](int idx) {
|
||||
std::mutex& operator[](SizeType idx) {
|
||||
return *locks[idx];
|
||||
}
|
||||
|
||||
const std::mutex& operator[](int idx) const {
|
||||
const std::mutex& operator[](SizeType idx) const {
|
||||
return *locks[idx];
|
||||
}
|
||||
private:
|
||||
|
@ -23,9 +23,9 @@ namespace SPTAG
|
||||
// node type for storing KDT
|
||||
struct KDTNode
|
||||
{
|
||||
int left;
|
||||
int right;
|
||||
short split_dim;
|
||||
SizeType left;
|
||||
SizeType right;
|
||||
DimensionType split_dim;
|
||||
float split_value;
|
||||
};
|
||||
|
||||
@ -39,18 +39,18 @@ namespace SPTAG
|
||||
m_iSamples(other.m_iSamples) {}
|
||||
~KDTree() {}
|
||||
|
||||
inline const KDTNode& operator[](int index) const { return m_pTreeRoots[index]; }
|
||||
inline KDTNode& operator[](int index) { return m_pTreeRoots[index]; }
|
||||
inline const KDTNode& operator[](SizeType index) const { return m_pTreeRoots[index]; }
|
||||
inline KDTNode& operator[](SizeType index) { return m_pTreeRoots[index]; }
|
||||
|
||||
inline int size() const { return (int)m_pTreeRoots.size(); }
|
||||
inline SizeType size() const { return (SizeType)m_pTreeRoots.size(); }
|
||||
|
||||
template <typename T>
|
||||
void BuildTrees(VectorIndex* p_index, std::vector<int>* indices = nullptr)
|
||||
void BuildTrees(VectorIndex* p_index, std::vector<SizeType>* indices = nullptr)
|
||||
{
|
||||
std::vector<int> localindices;
|
||||
std::vector<SizeType> localindices;
|
||||
if (indices == nullptr) {
|
||||
localindices.resize(p_index->GetNumSamples());
|
||||
for (int i = 0; i < p_index->GetNumSamples(); i++) localindices[i] = i;
|
||||
for (SizeType i = 0; i < p_index->GetNumSamples(); i++) localindices[i] = i;
|
||||
}
|
||||
else {
|
||||
localindices.assign(indices->begin(), indices->end());
|
||||
@ -63,58 +63,41 @@ namespace SPTAG
|
||||
{
|
||||
Sleep(i * 100); std::srand(clock());
|
||||
|
||||
std::vector<int> pindices(localindices.begin(), localindices.end());
|
||||
std::vector<SizeType> pindices(localindices.begin(), localindices.end());
|
||||
std::random_shuffle(pindices.begin(), pindices.end());
|
||||
|
||||
m_pTreeStart[i] = i * (int)pindices.size();
|
||||
m_pTreeStart[i] = i * (SizeType)pindices.size();
|
||||
std::cout << "Start to build KDTree " << i + 1 << std::endl;
|
||||
int iTreeSize = m_pTreeStart[i];
|
||||
DivideTree<T>(p_index, pindices, 0, (int)pindices.size() - 1, m_pTreeStart[i], iTreeSize);
|
||||
SizeType iTreeSize = m_pTreeStart[i];
|
||||
DivideTree<T>(p_index, pindices, 0, (SizeType)pindices.size() - 1, m_pTreeStart[i], iTreeSize);
|
||||
std::cout << i + 1 << " KDTree built, " << iTreeSize - m_pTreeStart[i] << " " << pindices.size() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
bool SaveTrees(void **pKDTMemFile, int64_t &len) const
|
||||
inline std::uint64_t BufferSize() const
|
||||
{
|
||||
return sizeof(int) + sizeof(SizeType) * m_iTreeNumber +
|
||||
sizeof(SizeType) + sizeof(KDTNode) * m_pTreeRoots.size();
|
||||
}
|
||||
|
||||
bool SaveTrees(std::ostream& p_outstream) const
|
||||
{
|
||||
int treeNodeSize = (int)m_pTreeRoots.size();
|
||||
|
||||
size_t size = sizeof(int) +
|
||||
sizeof(int) * m_iTreeNumber +
|
||||
sizeof(int) +
|
||||
sizeof(KDTNode) * treeNodeSize;
|
||||
char *mem = (char*)malloc(size);
|
||||
if (mem == NULL) return false;
|
||||
|
||||
auto ptr = mem;
|
||||
*(int*)ptr = m_iTreeNumber;
|
||||
ptr += sizeof(int);
|
||||
|
||||
memcpy(ptr, m_pTreeStart.data(), sizeof(int) * m_iTreeNumber);
|
||||
ptr += sizeof(int) * m_iTreeNumber;
|
||||
|
||||
*(int*)ptr = treeNodeSize;
|
||||
ptr += sizeof(int);
|
||||
|
||||
memcpy(ptr, m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize);
|
||||
*pKDTMemFile = mem;
|
||||
len = size;
|
||||
|
||||
p_outstream.write((char*)&m_iTreeNumber, sizeof(int));
|
||||
p_outstream.write((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber);
|
||||
SizeType treeNodeSize = (SizeType)m_pTreeRoots.size();
|
||||
p_outstream.write((char*)&treeNodeSize, sizeof(SizeType));
|
||||
p_outstream.write((char*)m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize);
|
||||
std::cout << "Save KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SaveTrees(std::string sTreeFileName) const
|
||||
{
|
||||
std::cout << "Save KDT to " << sTreeFileName << std::endl;
|
||||
FILE *fp = fopen(sTreeFileName.c_str(), "wb");
|
||||
if (fp == NULL) return false;
|
||||
|
||||
fwrite(&m_iTreeNumber, sizeof(int), 1, fp);
|
||||
fwrite(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp);
|
||||
int treeNodeSize = (int)m_pTreeRoots.size();
|
||||
fwrite(&treeNodeSize, sizeof(int), 1, fp);
|
||||
fwrite(m_pTreeRoots.data(), sizeof(KDTNode), treeNodeSize, fp);
|
||||
fclose(fp);
|
||||
std::cout << "Save KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
|
||||
std::ofstream output(sTreeFileName, std::ios::binary);
|
||||
if (!output.is_open()) return false;
|
||||
SaveTrees(output);
|
||||
output.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -123,31 +106,32 @@ namespace SPTAG
|
||||
m_iTreeNumber = *((int*)pKDTMemFile);
|
||||
pKDTMemFile += sizeof(int);
|
||||
m_pTreeStart.resize(m_iTreeNumber);
|
||||
memcpy(m_pTreeStart.data(), pKDTMemFile, sizeof(int) * m_iTreeNumber);
|
||||
pKDTMemFile += sizeof(int)*m_iTreeNumber;
|
||||
memcpy(m_pTreeStart.data(), pKDTMemFile, sizeof(SizeType) * m_iTreeNumber);
|
||||
pKDTMemFile += sizeof(SizeType)*m_iTreeNumber;
|
||||
|
||||
int treeNodeSize = *((int*)pKDTMemFile);
|
||||
pKDTMemFile += sizeof(int);
|
||||
SizeType treeNodeSize = *((SizeType*)pKDTMemFile);
|
||||
pKDTMemFile += sizeof(SizeType);
|
||||
m_pTreeRoots.resize(treeNodeSize);
|
||||
memcpy(m_pTreeRoots.data(), pKDTMemFile, sizeof(KDTNode) * treeNodeSize);
|
||||
std::cout << "Load KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LoadTrees(std::string sTreeFileName)
|
||||
{
|
||||
std::cout << "Load KDT From " << sTreeFileName << std::endl;
|
||||
FILE *fp = fopen(sTreeFileName.c_str(), "rb");
|
||||
if (fp == NULL) return false;
|
||||
std::ifstream input(sTreeFileName, std::ios::binary);
|
||||
if (!input.is_open()) return false;
|
||||
|
||||
fread(&m_iTreeNumber, sizeof(int), 1, fp);
|
||||
input.read((char*)&m_iTreeNumber, sizeof(int));
|
||||
m_pTreeStart.resize(m_iTreeNumber);
|
||||
fread(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp);
|
||||
input.read((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber);
|
||||
|
||||
int treeNodeSize;
|
||||
fread(&treeNodeSize, sizeof(int), 1, fp);
|
||||
SizeType treeNodeSize;
|
||||
input.read((char*)&treeNodeSize, sizeof(SizeType));
|
||||
m_pTreeRoots.resize(treeNodeSize);
|
||||
fread(m_pTreeRoots.data(), sizeof(KDTNode), treeNodeSize, fp);
|
||||
fclose(fp);
|
||||
input.read((char*)m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize);
|
||||
input.close();
|
||||
std::cout << "Load KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
@ -155,7 +139,7 @@ namespace SPTAG
|
||||
template <typename T>
|
||||
void InitSearchTrees(const VectorIndex* p_index, const COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const int p_limits) const
|
||||
{
|
||||
for (char i = 0; i < m_iTreeNumber; i++) {
|
||||
for (int i = 0; i < m_iTreeNumber; i++) {
|
||||
KDTSearch(p_index, p_query, p_space, m_pTreeStart[i], true, 0);
|
||||
}
|
||||
|
||||
@ -181,10 +165,10 @@ namespace SPTAG
|
||||
|
||||
template <typename T>
|
||||
void KDTSearch(const VectorIndex* p_index, const COMMON::QueryResultSet<T> &p_query,
|
||||
COMMON::WorkSpace& p_space, const int node, const bool isInit, const float distBound) const {
|
||||
COMMON::WorkSpace& p_space, const SizeType node, const bool isInit, const float distBound) const {
|
||||
if (node < 0)
|
||||
{
|
||||
int index = -node - 1;
|
||||
SizeType index = -node - 1;
|
||||
if (index >= p_index->GetNumSamples()) return;
|
||||
#ifdef PREFETCH
|
||||
const char* data = (const char *)(p_index->GetSample(index));
|
||||
@ -203,7 +187,7 @@ namespace SPTAG
|
||||
|
||||
float diff = (p_query.GetTarget())[tnode.split_dim] - tnode.split_value;
|
||||
float distanceBound = distBound + diff * diff;
|
||||
int otherChild, bestChild;
|
||||
SizeType otherChild, bestChild;
|
||||
if (diff < 0)
|
||||
{
|
||||
bestChild = tnode.left;
|
||||
@ -224,10 +208,10 @@ namespace SPTAG
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DivideTree(VectorIndex* p_index, std::vector<int>& indices, int first, int last,
|
||||
int index, int &iTreeSize) {
|
||||
void DivideTree(VectorIndex* p_index, std::vector<SizeType>& indices, SizeType first, SizeType last,
|
||||
SizeType index, SizeType &iTreeSize) {
|
||||
ChooseDivision<T>(p_index, m_pTreeRoots[index], indices, first, last);
|
||||
int i = Subdivide<T>(p_index, m_pTreeRoots[index], indices, first, last);
|
||||
SizeType i = Subdivide<T>(p_index, m_pTreeRoots[index], indices, first, last);
|
||||
if (i - 1 <= first)
|
||||
{
|
||||
m_pTreeRoots[index].left = -indices[first] - 1;
|
||||
@ -251,30 +235,30 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ChooseDivision(VectorIndex* p_index, KDTNode& node, const std::vector<int>& indices, const int first, const int last)
|
||||
void ChooseDivision(VectorIndex* p_index, KDTNode& node, const std::vector<SizeType>& indices, const SizeType first, const SizeType last)
|
||||
{
|
||||
std::vector<float> meanValues(p_index->GetFeatureDim(), 0);
|
||||
std::vector<float> varianceValues(p_index->GetFeatureDim(), 0);
|
||||
int end = min(first + m_iSamples, last);
|
||||
int count = end - first + 1;
|
||||
SizeType end = min(first + m_iSamples, last);
|
||||
SizeType count = end - first + 1;
|
||||
// calculate the mean of each dimension
|
||||
for (int j = first; j <= end; j++)
|
||||
for (SizeType j = first; j <= end; j++)
|
||||
{
|
||||
const T* v = (const T*)p_index->GetSample(indices[j]);
|
||||
for (int k = 0; k < p_index->GetFeatureDim(); k++)
|
||||
for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++)
|
||||
{
|
||||
meanValues[k] += v[k];
|
||||
}
|
||||
}
|
||||
for (int k = 0; k < p_index->GetFeatureDim(); k++)
|
||||
for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++)
|
||||
{
|
||||
meanValues[k] /= count;
|
||||
}
|
||||
// calculate the variance of each dimension
|
||||
for (int j = first; j <= end; j++)
|
||||
for (SizeType j = first; j <= end; j++)
|
||||
{
|
||||
const T* v = (const T*)p_index->GetSample(indices[j]);
|
||||
for (int k = 0; k < p_index->GetFeatureDim(); k++)
|
||||
for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++)
|
||||
{
|
||||
float dist = v[k] - meanValues[k];
|
||||
varianceValues[k] += dist*dist;
|
||||
@ -286,13 +270,13 @@ namespace SPTAG
|
||||
node.split_value = meanValues[node.split_dim];
|
||||
}
|
||||
|
||||
int SelectDivisionDimension(const std::vector<float>& varianceValues) const
|
||||
DimensionType SelectDivisionDimension(const std::vector<float>& varianceValues) const
|
||||
{
|
||||
// Record the top maximum variances
|
||||
std::vector<int> topind(m_numTopDimensionKDTSplit);
|
||||
std::vector<DimensionType> topind(m_numTopDimensionKDTSplit);
|
||||
int num = 0;
|
||||
// order the variances
|
||||
for (int i = 0; i < varianceValues.size(); i++)
|
||||
for (DimensionType i = 0; i < (DimensionType)varianceValues.size(); i++)
|
||||
{
|
||||
if (num < m_numTopDimensionKDTSplit || varianceValues[i] > varianceValues[topind[num - 1]])
|
||||
{
|
||||
@ -314,18 +298,18 @@ namespace SPTAG
|
||||
}
|
||||
}
|
||||
// randomly choose a dimension from TOP_DIM
|
||||
return topind[COMMON::Utils::rand_int(num)];
|
||||
return topind[COMMON::Utils::rand(num)];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Subdivide(VectorIndex* p_index, const KDTNode& node, std::vector<int>& indices, const int first, const int last) const
|
||||
SizeType Subdivide(VectorIndex* p_index, const KDTNode& node, std::vector<SizeType>& indices, const SizeType first, const SizeType last) const
|
||||
{
|
||||
int i = first;
|
||||
int j = last;
|
||||
SizeType i = first;
|
||||
SizeType j = last;
|
||||
// decide which child one point belongs
|
||||
while (i <= j)
|
||||
{
|
||||
int ind = indices[i];
|
||||
SizeType ind = indices[i];
|
||||
const T* v = (const T*)p_index->GetSample(ind);
|
||||
float val = v[node.split_dim];
|
||||
if (val < node.split_value)
|
||||
@ -347,7 +331,7 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<int> m_pTreeStart;
|
||||
std::vector<SizeType> m_pTreeStart;
|
||||
std::vector<KDTNode> m_pTreeRoots;
|
||||
|
||||
public:
|
||||
|
@ -27,18 +27,21 @@ namespace SPTAG
|
||||
m_iCEFScale(2),
|
||||
m_iRefineIter(0),
|
||||
m_iCEF(1000),
|
||||
m_iMaxCheckForRefineGraph(10000) {}
|
||||
m_iMaxCheckForRefineGraph(10000)
|
||||
{
|
||||
m_pNeighborhoodGraph.SetName("Graph");
|
||||
}
|
||||
|
||||
~NeighborhoodGraph() {}
|
||||
|
||||
virtual void InsertNeighbors(VectorIndex* index, const int node, int insertNode, float insertDist) = 0;
|
||||
virtual void InsertNeighbors(VectorIndex* index, const SizeType node, SizeType insertNode, float insertDist) = 0;
|
||||
|
||||
virtual void RebuildNeighbors(VectorIndex* index, const int node, int* nodes, const BasicResult* queryResults, const int numResults) = 0;
|
||||
virtual void RebuildNeighbors(VectorIndex* index, const SizeType node, SizeType* nodes, const BasicResult* queryResults, const int numResults) = 0;
|
||||
|
||||
virtual float GraphAccuracyEstimation(VectorIndex* index, const int samples, const std::unordered_map<int, int>* idmap = nullptr) = 0;
|
||||
virtual float GraphAccuracyEstimation(VectorIndex* index, const SizeType samples, const std::unordered_map<SizeType, SizeType>* idmap = nullptr) = 0;
|
||||
|
||||
template <typename T>
|
||||
void BuildGraph(VectorIndex* index, const std::unordered_map<int, int>* idmap = nullptr)
|
||||
void BuildGraph(VectorIndex* index, const std::unordered_map<SizeType, SizeType>* idmap = nullptr)
|
||||
{
|
||||
std::cout << "build RNG graph!" << std::endl;
|
||||
|
||||
@ -55,11 +58,11 @@ namespace SPTAG
|
||||
|
||||
{
|
||||
COMMON::Dataset<float> NeighborhoodDists(m_iGraphSize, m_iNeighborhoodSize);
|
||||
std::vector<std::vector<int>> TptreeDataIndices(m_iTPTNumber, std::vector<int>(m_iGraphSize));
|
||||
std::vector<std::vector<std::pair<int, int>>> TptreeLeafNodes(m_iTPTNumber, std::vector<std::pair<int, int>>());
|
||||
std::vector<std::vector<SizeType>> TptreeDataIndices(m_iTPTNumber, std::vector<SizeType>(m_iGraphSize));
|
||||
std::vector<std::vector<std::pair<SizeType, SizeType>>> TptreeLeafNodes(m_iTPTNumber, std::vector<std::pair<SizeType, SizeType>>());
|
||||
|
||||
for (int i = 0; i < m_iGraphSize; i++)
|
||||
for (int j = 0; j < m_iNeighborhoodSize; j++)
|
||||
for (SizeType i = 0; i < m_iGraphSize; i++)
|
||||
for (DimensionType j = 0; j < m_iNeighborhoodSize; j++)
|
||||
(NeighborhoodDists)[i][j] = MaxDist;
|
||||
|
||||
std::cout << "Parallel TpTree Partition begin " << std::endl;
|
||||
@ -67,7 +70,7 @@ namespace SPTAG
|
||||
for (int i = 0; i < m_iTPTNumber; i++)
|
||||
{
|
||||
Sleep(i * 100); std::srand(clock());
|
||||
for (int j = 0; j < m_iGraphSize; j++) TptreeDataIndices[i][j] = j;
|
||||
for (SizeType j = 0; j < m_iGraphSize; j++) TptreeDataIndices[i][j] = j;
|
||||
std::random_shuffle(TptreeDataIndices[i].begin(), TptreeDataIndices[i].end());
|
||||
PartitionByTptree<T>(index, TptreeDataIndices[i], 0, m_iGraphSize - 1, TptreeLeafNodes[i]);
|
||||
std::cout << "Finish Getting Leaves for Tree " << i << std::endl;
|
||||
@ -77,17 +80,17 @@ namespace SPTAG
|
||||
for (int i = 0; i < m_iTPTNumber; i++)
|
||||
{
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
for (int j = 0; j < TptreeLeafNodes[i].size(); j++)
|
||||
for (SizeType j = 0; j < (SizeType)TptreeLeafNodes[i].size(); j++)
|
||||
{
|
||||
int start_index = TptreeLeafNodes[i][j].first;
|
||||
int end_index = TptreeLeafNodes[i][j].second;
|
||||
SizeType start_index = TptreeLeafNodes[i][j].first;
|
||||
SizeType end_index = TptreeLeafNodes[i][j].second;
|
||||
if (omp_get_thread_num() == 0) std::cout << "\rProcessing Tree " << i << ' ' << j * 100 / TptreeLeafNodes[i].size() << '%';
|
||||
for (int x = start_index; x < end_index; x++)
|
||||
for (SizeType x = start_index; x < end_index; x++)
|
||||
{
|
||||
for (int y = x + 1; y <= end_index; y++)
|
||||
for (SizeType y = x + 1; y <= end_index; y++)
|
||||
{
|
||||
int p1 = TptreeDataIndices[i][x];
|
||||
int p2 = TptreeDataIndices[i][y];
|
||||
SizeType p1 = TptreeDataIndices[i][x];
|
||||
SizeType p2 = TptreeDataIndices[i][y];
|
||||
float dist = index->ComputeDistance(index->GetSample(p1), index->GetSample(p2));
|
||||
if (idmap != nullptr) {
|
||||
p1 = (idmap->find(p1) == idmap->end()) ? p1 : idmap->at(p1);
|
||||
@ -112,13 +115,13 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void RefineGraph(VectorIndex* index, const std::unordered_map<int, int>* idmap = nullptr)
|
||||
void RefineGraph(VectorIndex* index, const std::unordered_map<SizeType, SizeType>* idmap = nullptr)
|
||||
{
|
||||
m_iCEF *= m_iCEFScale;
|
||||
m_iMaxCheckForRefineGraph *= m_iCEFScale;
|
||||
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
for (int i = 0; i < m_iGraphSize; i++)
|
||||
for (SizeType i = 0; i < m_iGraphSize; i++)
|
||||
{
|
||||
RefineNode<T>(index, i, false);
|
||||
if (i % 1000 == 0) std::cout << "\rRefine 1 " << (i * 100 / m_iGraphSize) << "%";
|
||||
@ -130,7 +133,7 @@ namespace SPTAG
|
||||
m_iNeighborhoodSize /= m_iNeighborhoodScale;
|
||||
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
for (int i = 0; i < m_iGraphSize; i++)
|
||||
for (SizeType i = 0; i < m_iGraphSize; i++)
|
||||
{
|
||||
RefineNode<T>(index, i, false);
|
||||
if (i % 1000 == 0) std::cout << "\rRefine 2 " << (i * 100 / m_iGraphSize) << "%";
|
||||
@ -147,17 +150,17 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode RefineGraph(VectorIndex* index, std::vector<int>& indices, std::vector<int>& reverseIndices,
|
||||
std::string graphFileName, const std::unordered_map<int, int>* idmap = nullptr)
|
||||
ErrorCode RefineGraph(VectorIndex* index, std::vector<SizeType>& indices, std::vector<SizeType>& reverseIndices,
|
||||
std::ostream& output, const std::unordered_map<SizeType, SizeType>* idmap = nullptr)
|
||||
{
|
||||
int R = (int)indices.size();
|
||||
SizeType R = (SizeType)indices.size();
|
||||
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
for (int i = 0; i < R; i++)
|
||||
for (SizeType i = 0; i < R; i++)
|
||||
{
|
||||
RefineNode<T>(index, indices[i], false);
|
||||
int* nodes = m_pNeighborhoodGraph[indices[i]];
|
||||
for (int j = 0; j < m_iNeighborhoodSize; j++)
|
||||
SizeType* nodes = m_pNeighborhoodGraph[indices[i]];
|
||||
for (DimensionType j = 0; j < m_iNeighborhoodSize; j++)
|
||||
{
|
||||
if (nodes[j] < 0) nodes[j] = -1;
|
||||
else nodes[j] = reverseIndices[nodes[j]];
|
||||
@ -166,20 +169,13 @@ namespace SPTAG
|
||||
nodes[m_iNeighborhoodSize - 1] = -2 - idmap->at(-1 - indices[i]);
|
||||
}
|
||||
|
||||
std::ofstream graphOut(graphFileName, std::ios::binary);
|
||||
if (!graphOut.is_open()) return ErrorCode::FailedCreateFile;
|
||||
graphOut.write((char*)&R, sizeof(int));
|
||||
graphOut.write((char*)&m_iNeighborhoodSize, sizeof(int));
|
||||
for (int i = 0; i < R; i++) {
|
||||
graphOut.write((char*)m_pNeighborhoodGraph[indices[i]], sizeof(int) * m_iNeighborhoodSize);
|
||||
}
|
||||
graphOut.close();
|
||||
m_pNeighborhoodGraph.Refine(indices, output);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void RefineNode(VectorIndex* index, const int node, bool updateNeighbors)
|
||||
void RefineNode(VectorIndex* index, const SizeType node, bool updateNeighbors)
|
||||
{
|
||||
COMMON::QueryResultSet<T> query((const T*)index->GetSample(node), m_iCEF + 1);
|
||||
index->SearchIndex(query);
|
||||
@ -200,8 +196,8 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void PartitionByTptree(VectorIndex* index, std::vector<int>& indices, const int first, const int last,
|
||||
std::vector<std::pair<int, int>> & leaves)
|
||||
void PartitionByTptree(VectorIndex* index, std::vector<SizeType>& indices, const SizeType first, const SizeType last,
|
||||
std::vector<std::pair<SizeType, SizeType>> & leaves)
|
||||
{
|
||||
if (last - first <= m_iTPTLeafSize)
|
||||
{
|
||||
@ -212,39 +208,39 @@ namespace SPTAG
|
||||
std::vector<float> Mean(index->GetFeatureDim(), 0);
|
||||
|
||||
int iIteration = 100;
|
||||
int end = min(first + m_iSamples, last);
|
||||
int count = end - first + 1;
|
||||
SizeType end = min(first + m_iSamples, last);
|
||||
SizeType count = end - first + 1;
|
||||
// calculate the mean of each dimension
|
||||
for (int j = first; j <= end; j++)
|
||||
for (SizeType j = first; j <= end; j++)
|
||||
{
|
||||
const T* v = (const T*)index->GetSample(indices[j]);
|
||||
for (int k = 0; k < index->GetFeatureDim(); k++)
|
||||
for (DimensionType k = 0; k < index->GetFeatureDim(); k++)
|
||||
{
|
||||
Mean[k] += v[k];
|
||||
}
|
||||
}
|
||||
for (int k = 0; k < index->GetFeatureDim(); k++)
|
||||
for (DimensionType k = 0; k < index->GetFeatureDim(); k++)
|
||||
{
|
||||
Mean[k] /= count;
|
||||
}
|
||||
std::vector<BasicResult> Variance;
|
||||
Variance.reserve(index->GetFeatureDim());
|
||||
for (int j = 0; j < index->GetFeatureDim(); j++)
|
||||
for (DimensionType j = 0; j < index->GetFeatureDim(); j++)
|
||||
{
|
||||
Variance.push_back(BasicResult(j, 0));
|
||||
}
|
||||
// calculate the variance of each dimension
|
||||
for (int j = first; j <= end; j++)
|
||||
for (SizeType j = first; j <= end; j++)
|
||||
{
|
||||
const T* v = (const T*)index->GetSample(indices[j]);
|
||||
for (int k = 0; k < index->GetFeatureDim(); k++)
|
||||
for (DimensionType k = 0; k < index->GetFeatureDim(); k++)
|
||||
{
|
||||
float dist = v[k] - Mean[k];
|
||||
Variance[k].Dist += dist*dist;
|
||||
}
|
||||
}
|
||||
std::sort(Variance.begin(), Variance.end(), COMMON::Compare);
|
||||
std::vector<int> indexs(m_numTopDimensionTPTSplit);
|
||||
std::vector<SizeType> indexs(m_numTopDimensionTPTSplit);
|
||||
std::vector<float> weight(m_numTopDimensionTPTSplit), bestweight(m_numTopDimensionTPTSplit);
|
||||
float bestvariance = Variance[index->GetFeatureDim() - 1].Dist;
|
||||
for (int i = 0; i < m_numTopDimensionTPTSplit; i++)
|
||||
@ -270,7 +266,7 @@ namespace SPTAG
|
||||
weight[j] /= sumweight;
|
||||
}
|
||||
float mean = 0;
|
||||
for (int j = 0; j < count; j++)
|
||||
for (SizeType j = 0; j < count; j++)
|
||||
{
|
||||
Val[j] = 0;
|
||||
const T* v = (const T*)index->GetSample(indices[first + j]);
|
||||
@ -282,7 +278,7 @@ namespace SPTAG
|
||||
}
|
||||
mean /= count;
|
||||
float var = 0;
|
||||
for (int j = 0; j < count; j++)
|
||||
for (SizeType j = 0; j < count; j++)
|
||||
{
|
||||
float dist = Val[j] - mean;
|
||||
var += dist * dist;
|
||||
@ -297,8 +293,8 @@ namespace SPTAG
|
||||
}
|
||||
}
|
||||
}
|
||||
int i = first;
|
||||
int j = last;
|
||||
SizeType i = first;
|
||||
SizeType j = last;
|
||||
// decide which child one point belongs
|
||||
while (i <= j)
|
||||
{
|
||||
@ -336,100 +332,71 @@ namespace SPTAG
|
||||
}
|
||||
}
|
||||
|
||||
inline std::uint64_t BufferSize() const
|
||||
{
|
||||
return m_pNeighborhoodGraph.BufferSize();
|
||||
}
|
||||
|
||||
bool LoadGraph(std::string sGraphFilename)
|
||||
{
|
||||
std::cout << "Load Graph From " << sGraphFilename << std::endl;
|
||||
FILE * fp = fopen(sGraphFilename.c_str(), "rb");
|
||||
if (fp == NULL) return false;
|
||||
if (!m_pNeighborhoodGraph.Load(sGraphFilename)) return false;
|
||||
|
||||
fread(&m_iGraphSize, sizeof(int), 1, fp);
|
||||
fread(&m_iNeighborhoodSize, sizeof(int), 1, fp);
|
||||
m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize);
|
||||
m_iGraphSize = m_pNeighborhoodGraph.R();
|
||||
m_iNeighborhoodSize = m_pNeighborhoodGraph.C();
|
||||
m_dataUpdateLock.resize(m_iGraphSize);
|
||||
|
||||
for (int i = 0; i < m_iGraphSize; i++)
|
||||
{
|
||||
fread((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
std::cout << "Load Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LoadGraphFromMemory(char* pGraphMemFile)
|
||||
bool LoadGraph(char* pGraphMemFile)
|
||||
{
|
||||
m_iGraphSize = *((int*)pGraphMemFile);
|
||||
pGraphMemFile += sizeof(int);
|
||||
m_pNeighborhoodGraph.Load(pGraphMemFile);
|
||||
|
||||
m_iNeighborhoodSize = *((int*)pGraphMemFile);
|
||||
pGraphMemFile += sizeof(int);
|
||||
|
||||
m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize, (int*)pGraphMemFile);
|
||||
m_iGraphSize = m_pNeighborhoodGraph.R();
|
||||
m_iNeighborhoodSize = m_pNeighborhoodGraph.C();
|
||||
m_dataUpdateLock.resize(m_iGraphSize);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SaveGraph(std::string sGraphFilename) const
|
||||
{
|
||||
std::cout << "Save Graph To " << sGraphFilename << std::endl;
|
||||
FILE *fp = fopen(sGraphFilename.c_str(), "wb");
|
||||
if (fp == NULL) return false;
|
||||
|
||||
fwrite(&m_iGraphSize, sizeof(int), 1, fp);
|
||||
fwrite(&m_iNeighborhoodSize, sizeof(int), 1, fp);
|
||||
for (int i = 0; i < m_iGraphSize; i++)
|
||||
{
|
||||
fwrite((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
std::cout << "Save Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl;
|
||||
return true;
|
||||
return m_pNeighborhoodGraph.Save(sGraphFilename);
|
||||
}
|
||||
|
||||
bool SaveGraphToMemory(void **pGraphMemFile, int64_t &len) {
|
||||
size_t size = sizeof(int) + sizeof(int) + sizeof(int) * m_iNeighborhoodSize * m_iGraphSize;
|
||||
char *mem = (char*)malloc(size);
|
||||
if (mem == NULL) return false;
|
||||
|
||||
auto ptr = mem;
|
||||
*(int*)ptr = m_iGraphSize;
|
||||
ptr += sizeof(int);
|
||||
|
||||
*(int*)ptr = m_iNeighborhoodSize;
|
||||
ptr += sizeof(int);
|
||||
|
||||
for (int i = 0; i < m_iGraphSize; i++)
|
||||
{
|
||||
memcpy(ptr, (m_pNeighborhoodGraph)[i], sizeof(int) * m_iNeighborhoodSize);
|
||||
ptr += sizeof(int) * m_iNeighborhoodSize;
|
||||
}
|
||||
*pGraphMemFile = mem;
|
||||
len = size;
|
||||
|
||||
return true;
|
||||
bool SaveGraph(std::ostream& output) const
|
||||
{
|
||||
return m_pNeighborhoodGraph.Save(output);
|
||||
}
|
||||
|
||||
inline void AddBatch(int num) { m_pNeighborhoodGraph.AddBatch(num); m_iGraphSize += num; m_dataUpdateLock.resize(m_iGraphSize); }
|
||||
inline ErrorCode AddBatch(SizeType num)
|
||||
{
|
||||
ErrorCode ret = m_pNeighborhoodGraph.AddBatch(num);
|
||||
if (ret != ErrorCode::Success) return ret;
|
||||
|
||||
inline int* operator[](int index) { return m_pNeighborhoodGraph[index]; }
|
||||
m_iGraphSize += num;
|
||||
m_dataUpdateLock.resize(m_iGraphSize);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
inline const int* operator[](int index) const { return m_pNeighborhoodGraph[index]; }
|
||||
inline SizeType* operator[](SizeType index) { return m_pNeighborhoodGraph[index]; }
|
||||
|
||||
inline void SetR(int rows) { m_pNeighborhoodGraph.SetR(rows); m_iGraphSize = rows; m_dataUpdateLock.resize(m_iGraphSize); }
|
||||
inline const SizeType* operator[](SizeType index) const { return m_pNeighborhoodGraph[index]; }
|
||||
|
||||
inline int R() const { return m_iGraphSize; }
|
||||
inline void SetR(SizeType rows) { m_pNeighborhoodGraph.SetR(rows); m_iGraphSize = rows; m_dataUpdateLock.resize(m_iGraphSize); }
|
||||
|
||||
inline SizeType R() const { return m_iGraphSize; }
|
||||
|
||||
static std::shared_ptr<NeighborhoodGraph> CreateInstance(std::string type);
|
||||
|
||||
protected:
|
||||
// Graph structure
|
||||
int m_iGraphSize;
|
||||
COMMON::Dataset<int> m_pNeighborhoodGraph;
|
||||
SizeType m_iGraphSize;
|
||||
COMMON::Dataset<SizeType> m_pNeighborhoodGraph;
|
||||
COMMON::FineGrainedLock m_dataUpdateLock; // protect one row of the graph
|
||||
|
||||
public:
|
||||
int m_iTPTNumber, m_iTPTLeafSize, m_iSamples, m_numTopDimensionTPTSplit;
|
||||
int m_iNeighborhoodSize, m_iNeighborhoodScale, m_iCEFScale, m_iRefineIter, m_iCEF, m_iMaxCheckForRefineGraph;
|
||||
DimensionType m_iNeighborhoodSize;
|
||||
int m_iNeighborhoodScale, m_iCEFScale, m_iRefineIter, m_iCEF, m_iMaxCheckForRefineGraph;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ public:
|
||||
return m_results[0].Dist;
|
||||
}
|
||||
|
||||
bool AddPoint(const int index, float dist)
|
||||
bool AddPoint(const SizeType index, float dist)
|
||||
{
|
||||
if (dist < m_results[0].Dist || (dist == m_results[0].Dist && index < m_results[0].VID))
|
||||
{
|
||||
|
@ -13,15 +13,15 @@ namespace SPTAG
|
||||
class RelativeNeighborhoodGraph: public NeighborhoodGraph
|
||||
{
|
||||
public:
|
||||
void RebuildNeighbors(VectorIndex* index, const int node, int* nodes, const BasicResult* queryResults, const int numResults) {
|
||||
int count = 0;
|
||||
void RebuildNeighbors(VectorIndex* index, const SizeType node, SizeType* nodes, const BasicResult* queryResults, const int numResults) {
|
||||
DimensionType count = 0;
|
||||
for (int j = 0; j < numResults && count < m_iNeighborhoodSize; j++) {
|
||||
const BasicResult& item = queryResults[j];
|
||||
if (item.VID < 0) break;
|
||||
if (item.VID == node) continue;
|
||||
|
||||
bool good = true;
|
||||
for (int k = 0; k < count; k++) {
|
||||
for (DimensionType k = 0; k < count; k++) {
|
||||
if (index->ComputeDistance(index->GetSample(nodes[k]), index->GetSample(item.VID)) <= item.Dist) {
|
||||
good = false;
|
||||
break;
|
||||
@ -29,21 +29,21 @@ namespace SPTAG
|
||||
}
|
||||
if (good) nodes[count++] = item.VID;
|
||||
}
|
||||
for (int j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1;
|
||||
for (DimensionType j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1;
|
||||
}
|
||||
|
||||
void InsertNeighbors(VectorIndex* index, const int node, int insertNode, float insertDist)
|
||||
void InsertNeighbors(VectorIndex* index, const SizeType node, SizeType insertNode, float insertDist)
|
||||
{
|
||||
int* nodes = m_pNeighborhoodGraph[node];
|
||||
for (int k = 0; k < m_iNeighborhoodSize; k++)
|
||||
SizeType* nodes = m_pNeighborhoodGraph[node];
|
||||
for (DimensionType k = 0; k < m_iNeighborhoodSize; k++)
|
||||
{
|
||||
int tmpNode = nodes[k];
|
||||
SizeType tmpNode = nodes[k];
|
||||
if (tmpNode < -1) continue;
|
||||
|
||||
if (tmpNode < 0)
|
||||
{
|
||||
bool good = true;
|
||||
for (int t = 0; t < k; t++) {
|
||||
for (DimensionType t = 0; t < k; t++) {
|
||||
if (index->ComputeDistance(index->GetSample(insertNode), index->GetSample(nodes[t])) < insertDist) {
|
||||
good = false;
|
||||
break;
|
||||
@ -58,7 +58,7 @@ namespace SPTAG
|
||||
if (insertDist < tmpDist || (insertDist == tmpDist && insertNode < tmpNode))
|
||||
{
|
||||
bool good = true;
|
||||
for (int t = 0; t < k; t++) {
|
||||
for (DimensionType t = 0; t < k; t++) {
|
||||
if (index->ComputeDistance(index->GetSample(insertNode), index->GetSample(nodes[t])) < insertDist) {
|
||||
good = false;
|
||||
break;
|
||||
@ -76,33 +76,33 @@ namespace SPTAG
|
||||
}
|
||||
}
|
||||
|
||||
float GraphAccuracyEstimation(VectorIndex* index, const int samples, const std::unordered_map<int, int>* idmap = nullptr)
|
||||
float GraphAccuracyEstimation(VectorIndex* index, const SizeType samples, const std::unordered_map<SizeType, SizeType>* idmap = nullptr)
|
||||
{
|
||||
int* correct = new int[samples];
|
||||
DimensionType* correct = new DimensionType[samples];
|
||||
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
for (int i = 0; i < samples; i++)
|
||||
for (SizeType i = 0; i < samples; i++)
|
||||
{
|
||||
int x = COMMON::Utils::rand_int(m_iGraphSize);
|
||||
SizeType x = COMMON::Utils::rand(m_iGraphSize);
|
||||
//int x = i;
|
||||
COMMON::QueryResultSet<void> query(nullptr, m_iCEF);
|
||||
for (int y = 0; y < m_iGraphSize; y++)
|
||||
for (SizeType y = 0; y < m_iGraphSize; y++)
|
||||
{
|
||||
if ((idmap != nullptr && idmap->find(y) != idmap->end())) continue;
|
||||
float dist = index->ComputeDistance(index->GetSample(x), index->GetSample(y));
|
||||
query.AddPoint(y, dist);
|
||||
}
|
||||
query.SortResult();
|
||||
int * exact_rng = new int[m_iNeighborhoodSize];
|
||||
SizeType * exact_rng = new SizeType[m_iNeighborhoodSize];
|
||||
RebuildNeighbors(index, x, exact_rng, query.GetResults(), m_iCEF);
|
||||
|
||||
correct[i] = 0;
|
||||
for (int j = 0; j < m_iNeighborhoodSize; j++) {
|
||||
for (DimensionType j = 0; j < m_iNeighborhoodSize; j++) {
|
||||
if (exact_rng[j] == -1) {
|
||||
correct[i] += m_iNeighborhoodSize - j;
|
||||
break;
|
||||
}
|
||||
for (int k = 0; k < m_iNeighborhoodSize; k++)
|
||||
for (DimensionType k = 0; k < m_iNeighborhoodSize; k++)
|
||||
if ((m_pNeighborhoodGraph)[x][k] == exact_rng[j]) {
|
||||
correct[i]++;
|
||||
break;
|
||||
@ -111,7 +111,7 @@ namespace SPTAG
|
||||
delete[] exact_rng;
|
||||
}
|
||||
float acc = 0;
|
||||
for (int i = 0; i < samples; i++) acc += float(correct[i]);
|
||||
for (SizeType i = 0; i < samples; i++) acc += float(correct[i]);
|
||||
acc = acc / samples / m_iNeighborhoodSize;
|
||||
delete[] correct;
|
||||
return acc;
|
||||
|
@ -14,10 +14,10 @@ namespace SPTAG
|
||||
// node type in the priority queue
|
||||
struct HeapCell
|
||||
{
|
||||
int node;
|
||||
SizeType node;
|
||||
float distance;
|
||||
|
||||
HeapCell(int _node = -1, float _distance = MaxDist) : node(_node), distance(_distance) {}
|
||||
HeapCell(SizeType _node = -1, float _distance = MaxDist) : node(_node), distance(_distance) {}
|
||||
|
||||
inline bool operator < (const HeapCell& rhs)
|
||||
{
|
||||
@ -45,12 +45,12 @@ namespace SPTAG
|
||||
// Record 2 hash tables.
|
||||
// [0~m_poolSize + 1) is the first block.
|
||||
// [m_poolSize + 1, 2*(m_poolSize + 1)) is the second block;
|
||||
int m_hashTable[(m_poolSize + 1) * 2];
|
||||
SizeType m_hashTable[(m_poolSize + 1) * 2];
|
||||
|
||||
|
||||
inline unsigned hash_func2(int idx, int loop)
|
||||
inline unsigned hash_func2(unsigned idx, int loop)
|
||||
{
|
||||
return ((unsigned)idx + loop) & m_poolSize;
|
||||
return (idx + loop) & m_poolSize;
|
||||
}
|
||||
|
||||
|
||||
@ -65,7 +65,7 @@ namespace SPTAG
|
||||
~OptHashPosVector() {}
|
||||
|
||||
|
||||
void Init(int size)
|
||||
void Init(SizeType size)
|
||||
{
|
||||
m_secondHash = true;
|
||||
clear();
|
||||
@ -76,31 +76,31 @@ namespace SPTAG
|
||||
if (!m_secondHash)
|
||||
{
|
||||
// Clear first block.
|
||||
memset(&m_hashTable[0], 0, sizeof(int)*(m_poolSize + 1));
|
||||
memset(&m_hashTable[0], 0, sizeof(SizeType)*(m_poolSize + 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Clear all blocks.
|
||||
memset(&m_hashTable[0], 0, 2 * sizeof(int) * (m_poolSize + 1));
|
||||
memset(&m_hashTable[0], 0, 2 * sizeof(SizeType) * (m_poolSize + 1));
|
||||
m_secondHash = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline bool CheckAndSet(int idx)
|
||||
inline bool CheckAndSet(SizeType idx)
|
||||
{
|
||||
// Inner Index is begin from 1
|
||||
return _CheckAndSet(&m_hashTable[0], idx + 1) == 0;
|
||||
}
|
||||
|
||||
|
||||
inline int _CheckAndSet(int* hashTable, int idx)
|
||||
inline int _CheckAndSet(SizeType* hashTable, SizeType idx)
|
||||
{
|
||||
unsigned index, loop;
|
||||
unsigned index;
|
||||
|
||||
// Get first hash position.
|
||||
index = hash_func(idx);
|
||||
for (loop = 0; loop < m_maxLoop; ++loop)
|
||||
index = hash_func((unsigned)idx);
|
||||
for (int loop = 0; loop < m_maxLoop; ++loop)
|
||||
{
|
||||
if (!hashTable[index])
|
||||
{
|
||||
@ -132,7 +132,7 @@ namespace SPTAG
|
||||
// Variables for each single NN search
|
||||
struct WorkSpace
|
||||
{
|
||||
void Initialize(int maxCheck, int dataSize)
|
||||
void Initialize(int maxCheck, SizeType dataSize)
|
||||
{
|
||||
nodeCheckStatus.Init(dataSize);
|
||||
m_SPTQueue.Resize(maxCheck * 10);
|
||||
@ -158,7 +158,7 @@ namespace SPTAG
|
||||
m_iNumOfContinuousNoBetterPropagation = 0;
|
||||
}
|
||||
|
||||
inline bool CheckAndSet(int idx)
|
||||
inline bool CheckAndSet(SizeType idx)
|
||||
{
|
||||
return nodeCheckStatus.CheckAndSet(idx);
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ namespace COMMON
|
||||
class WorkSpacePool
|
||||
{
|
||||
public:
|
||||
WorkSpacePool(int p_maxCheck, int p_vectorCount);
|
||||
WorkSpacePool(int p_maxCheck, SizeType p_vectorCount);
|
||||
|
||||
virtual ~WorkSpacePool();
|
||||
|
||||
@ -34,7 +34,7 @@ private:
|
||||
|
||||
int m_maxCheck;
|
||||
|
||||
int m_vectorCount;
|
||||
SizeType m_vectorCount;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -4,53 +4,223 @@
|
||||
#ifndef _SPTAG_COMMONDATASTRUCTURE_H_
|
||||
#define _SPTAG_COMMONDATASTRUCTURE_H_
|
||||
|
||||
#include "Common.h"
|
||||
#include "inc/Core/Common.h"
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
|
||||
class ByteArray
|
||||
template<typename T>
|
||||
class Array
|
||||
{
|
||||
public:
|
||||
ByteArray();
|
||||
Array();
|
||||
|
||||
ByteArray(ByteArray&& p_right);
|
||||
|
||||
ByteArray(std::uint8_t* p_array, std::size_t p_length, bool p_transferOnwership);
|
||||
|
||||
ByteArray(std::uint8_t* p_array, std::size_t p_length, std::shared_ptr<std::uint8_t> p_dataHolder);
|
||||
|
||||
ByteArray(const ByteArray& p_right);
|
||||
|
||||
ByteArray& operator= (const ByteArray& p_right);
|
||||
|
||||
ByteArray& operator= (ByteArray&& p_right);
|
||||
|
||||
~ByteArray();
|
||||
|
||||
static ByteArray Alloc(std::size_t p_length);
|
||||
|
||||
std::uint8_t* Data() const;
|
||||
|
||||
std::size_t Length() const;
|
||||
Array(T* p_array, std::size_t p_length, bool p_transferOwnership);
|
||||
|
||||
void SetData(std::uint8_t* p_array, std::size_t p_length);
|
||||
Array(T* p_array, std::size_t p_length, std::shared_ptr<T> p_dataHolder);
|
||||
|
||||
std::shared_ptr<std::uint8_t> DataHolder() const;
|
||||
Array(Array<T>&& p_right);
|
||||
|
||||
Array(const Array<T>& p_right);
|
||||
|
||||
Array<T>& operator= (Array<T>&& p_right);
|
||||
|
||||
Array<T>& operator= (const Array<T>& p_right);
|
||||
|
||||
T& operator[] (std::size_t p_index);
|
||||
|
||||
const T& operator[] (std::size_t p_index) const;
|
||||
|
||||
~Array();
|
||||
|
||||
T* Data() const;
|
||||
|
||||
std::size_t Length() const;
|
||||
|
||||
std::shared_ptr<T> DataHolder() const;
|
||||
|
||||
void Set(T* p_array, std::size_t p_length, bool p_transferOwnership);
|
||||
|
||||
void Clear();
|
||||
|
||||
const static ByteArray c_empty;
|
||||
static Array<T> Alloc(std::size_t p_length);
|
||||
|
||||
const static Array<T> c_empty;
|
||||
|
||||
private:
|
||||
std::uint8_t* m_data;
|
||||
T* m_data;
|
||||
|
||||
std::size_t m_length;
|
||||
|
||||
// Notice this is holding an array. Set correct deleter for this.
|
||||
std::shared_ptr<std::uint8_t> m_dataHolder;
|
||||
std::shared_ptr<T> m_dataHolder;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
const Array<T> Array<T>::c_empty;
|
||||
|
||||
|
||||
template<typename T>
|
||||
Array<T>::Array()
|
||||
: m_data(nullptr),
|
||||
m_length(0)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Array<T>::Array(T* p_array, std::size_t p_length, bool p_transferOnwership)
|
||||
|
||||
: m_data(p_array),
|
||||
m_length(p_length)
|
||||
{
|
||||
if (p_transferOnwership)
|
||||
{
|
||||
m_dataHolder.reset(m_data, std::default_delete<T[]>());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Array<T>::Array(T* p_array, std::size_t p_length, std::shared_ptr<T> p_dataHolder)
|
||||
: m_data(p_array),
|
||||
m_length(p_length),
|
||||
m_dataHolder(std::move(p_dataHolder))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Array<T>::Array(Array<T>&& p_right)
|
||||
: m_data(p_right.m_data),
|
||||
m_length(p_right.m_length),
|
||||
m_dataHolder(std::move(p_right.m_dataHolder))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Array<T>::Array(const Array<T>& p_right)
|
||||
: m_data(p_right.m_data),
|
||||
m_length(p_right.m_length),
|
||||
m_dataHolder(p_right.m_dataHolder)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Array<T>&
|
||||
Array<T>::operator= (Array<T>&& p_right)
|
||||
{
|
||||
m_data = p_right.m_data;
|
||||
m_length = p_right.m_length;
|
||||
m_dataHolder = std::move(p_right.m_dataHolder);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Array<T>&
|
||||
Array<T>::operator= (const Array<T>& p_right)
|
||||
{
|
||||
m_data = p_right.m_data;
|
||||
m_length = p_right.m_length;
|
||||
m_dataHolder = p_right.m_dataHolder;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
T&
|
||||
Array<T>::operator[] (std::size_t p_index)
|
||||
{
|
||||
return m_data[p_index];
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
const T&
|
||||
Array<T>::operator[] (std::size_t p_index) const
|
||||
{
|
||||
return m_data[p_index];
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Array<T>::~Array()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
T*
|
||||
Array<T>::Data() const
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
std::size_t
|
||||
Array<T>::Length() const
|
||||
{
|
||||
return m_length;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
std::shared_ptr<T>
|
||||
Array<T>::DataHolder() const
|
||||
{
|
||||
return m_dataHolder;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
Array<T>::Set(T* p_array, std::size_t p_length, bool p_transferOwnership)
|
||||
{
|
||||
m_data = p_array;
|
||||
m_length = p_length;
|
||||
|
||||
if (p_transferOwnership)
|
||||
{
|
||||
m_dataHolder.reset(m_data, std::default_delete<T[]>());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
Array<T>::Clear()
|
||||
{
|
||||
m_data = nullptr;
|
||||
m_length = 0;
|
||||
m_dataHolder.reset();
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Array<T>
|
||||
Array<T>::Alloc(std::size_t p_length)
|
||||
{
|
||||
Array<T> arr;
|
||||
if (0 == p_length)
|
||||
{
|
||||
return arr;
|
||||
}
|
||||
|
||||
arr.m_dataHolder.reset(new T[p_length], std::default_delete<T[]>());
|
||||
|
||||
arr.m_length = p_length;
|
||||
arr.m_data = arr.m_dataHolder.get();
|
||||
return arr;
|
||||
}
|
||||
|
||||
|
||||
typedef Array<std::uint8_t> ByteArray;
|
||||
|
||||
} // namespace SPTAG
|
||||
|
||||
#endif // _SPTAG_COMMONDATASTRUCTURE_H_
|
||||
|
@ -28,6 +28,8 @@ DefineErrorCode(FailedOpenFile, 0x0002)
|
||||
DefineErrorCode(FailedCreateFile, 0x0003)
|
||||
DefineErrorCode(ParamNotFound, 0x0010)
|
||||
DefineErrorCode(FailedParseValue, 0x0011)
|
||||
DefineErrorCode(MemoryOverFlow, 0x0012)
|
||||
DefineErrorCode(LackOfInputs, 0x0013)
|
||||
|
||||
// 0x1000 ~ 0x1FFF Index Build Status
|
||||
|
||||
|
@ -15,12 +15,12 @@
|
||||
#include "../Common/WorkSpacePool.h"
|
||||
#include "../Common/RelativeNeighborhoodGraph.h"
|
||||
#include "../Common/KDTree.h"
|
||||
#include "inc/Helper/ConcurrentSet.h"
|
||||
#include "inc/Helper/StringConvert.h"
|
||||
#include "inc/Helper/SimpleIniReader.h"
|
||||
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <tbb/concurrent_unordered_set.h>
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
@ -48,14 +48,16 @@ namespace SPTAG
|
||||
std::string m_sKDTFilename;
|
||||
std::string m_sGraphFilename;
|
||||
std::string m_sDataPointsFilename;
|
||||
std::string m_sDeleteDataPointsFilename;
|
||||
|
||||
std::mutex m_dataLock; // protect data and graph
|
||||
tbb::concurrent_unordered_set<int> m_deletedID;
|
||||
std::mutex m_dataAddLock; // protect data and graph
|
||||
Helper::Concurrent::ConcurrentSet<SizeType> m_deletedID;
|
||||
float m_fDeletePercentageForRefine;
|
||||
std::unique_ptr<COMMON::WorkSpacePool> m_workSpacePool;
|
||||
|
||||
int m_iNumberOfThreads;
|
||||
DistCalcMethod m_iDistCalcMethod;
|
||||
float(*m_fComputeDistance)(const T* pX, const T* pY, int length);
|
||||
float(*m_fComputeDistance)(const T* pX, const T* pY, DimensionType length);
|
||||
|
||||
int m_iMaxCheck;
|
||||
int m_iThresholdOfNumberOfContinuousNoBetterPropagation;
|
||||
@ -63,20 +65,21 @@ namespace SPTAG
|
||||
int m_iNumberOfOtherDynamicPivots;
|
||||
public:
|
||||
Index()
|
||||
{
|
||||
{
|
||||
#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \
|
||||
VarName = DefaultValue; \
|
||||
|
||||
#include "inc/Core/KDT/ParameterDefinitionList.h"
|
||||
#undef DefineKDTParameter
|
||||
|
||||
m_fComputeDistance = COMMON::DistanceCalcSelector<T>(m_iDistCalcMethod);
|
||||
}
|
||||
|
||||
m_pSamples.SetName("Vector");
|
||||
m_fComputeDistance = COMMON::DistanceCalcSelector<T>(m_iDistCalcMethod);
|
||||
}
|
||||
|
||||
~Index() {}
|
||||
|
||||
inline int GetNumSamples() const { return m_pSamples.R(); }
|
||||
inline int GetFeatureDim() const { return m_pSamples.C(); }
|
||||
inline SizeType GetNumSamples() const { return m_pSamples.R(); }
|
||||
inline DimensionType GetFeatureDim() const { return m_pSamples.C(); }
|
||||
|
||||
inline int GetCurrMaxCheck() const { return m_iMaxCheck; }
|
||||
inline int GetNumThreads() const { return m_iNumberOfThreads; }
|
||||
@ -85,25 +88,41 @@ namespace SPTAG
|
||||
inline VectorValueType GetVectorValueType() const { return GetEnumValueType<T>(); }
|
||||
|
||||
inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); }
|
||||
inline const void* GetSample(const int idx) const { return (void*)m_pSamples[idx]; }
|
||||
inline const void* GetSample(const SizeType idx) const { return (void*)m_pSamples[idx]; }
|
||||
inline bool ContainSample(const SizeType idx) const { return !m_deletedID.contains(idx); }
|
||||
inline bool NeedRefine() const { return m_deletedID.size() >= (size_t)(GetNumSamples() * m_fDeletePercentageForRefine); }
|
||||
std::shared_ptr<std::vector<std::uint64_t>> BufferSize() const
|
||||
{
|
||||
std::shared_ptr<std::vector<std::uint64_t>> buffersize(new std::vector<std::uint64_t>);
|
||||
buffersize->push_back(m_pSamples.BufferSize());
|
||||
buffersize->push_back(m_pTrees.BufferSize());
|
||||
buffersize->push_back(m_pGraph.BufferSize());
|
||||
buffersize->push_back(m_deletedID.bufferSize());
|
||||
return std::move(buffersize);
|
||||
}
|
||||
|
||||
ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension);
|
||||
ErrorCode SaveConfig(std::ostream& p_configout) const;
|
||||
ErrorCode SaveIndexData(const std::string& p_folderPath);
|
||||
ErrorCode SaveIndexData(const std::vector<std::ostream*>& p_indexStreams);
|
||||
|
||||
ErrorCode SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t>& p_indexBlobsLen);
|
||||
ErrorCode LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs);
|
||||
ErrorCode LoadConfig(Helper::IniReader& p_reader);
|
||||
ErrorCode LoadIndexData(const std::string& p_folderPath);
|
||||
ErrorCode LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs);
|
||||
|
||||
ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout);
|
||||
ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader);
|
||||
ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension);
|
||||
ErrorCode SearchIndex(QueryResult &p_query) const;
|
||||
ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension);
|
||||
ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum);
|
||||
ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr);
|
||||
ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum);
|
||||
ErrorCode DeleteIndex(const SizeType& p_id);
|
||||
|
||||
ErrorCode SetParameter(const char* p_param, const char* p_value);
|
||||
std::string GetParameter(const char* p_param) const;
|
||||
|
||||
private:
|
||||
ErrorCode RefineIndex(const std::string& p_folderPath);
|
||||
void SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set<int> &p_deleted) const;
|
||||
ErrorCode RefineIndex(const std::vector<std::ostream*>& p_indexStreams);
|
||||
|
||||
private:
|
||||
void SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet<SizeType> &p_deleted) const;
|
||||
void SearchIndexWithoutDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space) const;
|
||||
};
|
||||
} // namespace KDT
|
||||
|
@ -7,16 +7,17 @@
|
||||
DefineKDTParameter(m_sKDTFilename, std::string, std::string("tree.bin"), "TreeFilePath")
|
||||
DefineKDTParameter(m_sGraphFilename, std::string, std::string("graph.bin"), "GraphFilePath")
|
||||
DefineKDTParameter(m_sDataPointsFilename, std::string, std::string("vectors.bin"), "VectorFilePath")
|
||||
DefineKDTParameter(m_sDeleteDataPointsFilename, std::string, std::string("deletes.bin"), "DeleteVectorFilePath")
|
||||
|
||||
DefineKDTParameter(m_pTrees.m_iTreeNumber, int, 1L, "KDTNumber")
|
||||
DefineKDTParameter(m_pTrees.m_numTopDimensionKDTSplit, int, 5L, "NumTopDimensionKDTSplit")
|
||||
DefineKDTParameter(m_pTrees.m_iSamples, int, 100L, "NumSamplesKDTSplitConsideration")
|
||||
DefineKDTParameter(m_pTrees.m_iSamples, int, 100L, "Samples")
|
||||
|
||||
DefineKDTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TPTNumber")
|
||||
DefineKDTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize")
|
||||
DefineKDTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTPTSplit")
|
||||
|
||||
DefineKDTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize")
|
||||
DefineKDTParameter(m_pGraph.m_iNeighborhoodSize, DimensionType, 32L, "NeighborhoodSize")
|
||||
DefineKDTParameter(m_pGraph.m_iNeighborhoodScale, int, 2L, "GraphNeighborhoodScale")
|
||||
DefineKDTParameter(m_pGraph.m_iCEFScale, int, 2L, "GraphCEFScale")
|
||||
DefineKDTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations")
|
||||
@ -26,6 +27,7 @@ DefineKDTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckFor
|
||||
DefineKDTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads")
|
||||
DefineKDTParameter(m_iDistCalcMethod, SPTAG::DistCalcMethod, SPTAG::DistCalcMethod::Cosine, "DistCalcMethod")
|
||||
|
||||
DefineKDTParameter(m_fDeletePercentageForRefine, float, 0.4F, "DeletePercentageForRefine")
|
||||
DefineKDTParameter(m_iMaxCheck, int, 8192L, "MaxCheck")
|
||||
DefineKDTParameter(m_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation")
|
||||
DefineKDTParameter(m_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots")
|
||||
|
@ -19,23 +19,23 @@ public:
|
||||
|
||||
virtual ~MetadataSet();
|
||||
|
||||
virtual ByteArray GetMetadata(IndexType p_vectorID) const = 0;
|
||||
virtual ByteArray GetMetadata(SizeType p_vectorID) const = 0;
|
||||
|
||||
virtual SizeType Count() const = 0;
|
||||
|
||||
virtual bool Available() const = 0;
|
||||
|
||||
virtual std::pair<std::uint64_t, std::uint64_t> BufferSize() const = 0;
|
||||
|
||||
virtual void AddBatch(MetadataSet& data) = 0;
|
||||
|
||||
virtual ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut) = 0;
|
||||
|
||||
virtual ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) = 0;
|
||||
|
||||
virtual ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) = 0;
|
||||
virtual ErrorCode RefineMetadata(std::vector<SizeType>& indices, std::ostream& p_metaOut, std::ostream& p_metaIndexOut);
|
||||
|
||||
virtual ErrorCode LoadMetadataFromMemory(void *pGraphMemFile) = 0;
|
||||
|
||||
virtual ErrorCode RefineMetadata(std::vector<int>& indices, const std::string& p_folderPath);
|
||||
|
||||
static ErrorCode MetaCopy(const std::string& p_src, const std::string& p_dst);
|
||||
virtual ErrorCode RefineMetadata(std::vector<SizeType>& indices, const std::string& p_metaFile, const std::string& p_metaindexFile);
|
||||
};
|
||||
|
||||
|
||||
@ -46,19 +46,20 @@ public:
|
||||
|
||||
~FileMetadataSet();
|
||||
|
||||
ByteArray GetMetadata(IndexType p_vectorID) const;
|
||||
ByteArray GetMetadata(SizeType p_vectorID) const;
|
||||
|
||||
SizeType Count() const;
|
||||
|
||||
bool Available() const;
|
||||
|
||||
std::pair<std::uint64_t, std::uint64_t> BufferSize() const;
|
||||
|
||||
void AddBatch(MetadataSet& data);
|
||||
|
||||
ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut);
|
||||
|
||||
ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile);
|
||||
|
||||
ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len);
|
||||
|
||||
ErrorCode LoadMetadataFromMemory(void *pGraphMemFile);
|
||||
private:
|
||||
std::ifstream* m_fp = nullptr;
|
||||
|
||||
@ -77,25 +78,24 @@ private:
|
||||
class MemMetadataSet : public MetadataSet
|
||||
{
|
||||
public:
|
||||
MemMetadataSet() = default;
|
||||
|
||||
MemMetadataSet(ByteArray p_metadata, ByteArray p_offsets, SizeType p_count);
|
||||
|
||||
~MemMetadataSet();
|
||||
|
||||
ByteArray GetMetadata(IndexType p_vectorID) const;
|
||||
ByteArray GetMetadata(SizeType p_vectorID) const;
|
||||
|
||||
SizeType Count() const;
|
||||
|
||||
bool Available() const;
|
||||
|
||||
std::pair<std::uint64_t, std::uint64_t> BufferSize() const;
|
||||
|
||||
void AddBatch(MetadataSet& data);
|
||||
|
||||
ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut);
|
||||
|
||||
ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile);
|
||||
|
||||
ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len);
|
||||
|
||||
ErrorCode LoadMetadataFromMemory(void *pGraphMemFile);
|
||||
private:
|
||||
std::vector<std::uint64_t> m_offsets;
|
||||
|
||||
|
@ -4,24 +4,13 @@
|
||||
#ifndef _SPTAG_SEARCHQUERY_H_
|
||||
#define _SPTAG_SEARCHQUERY_H_
|
||||
|
||||
#include "CommonDataStructure.h"
|
||||
#include "SearchResult.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
|
||||
struct BasicResult
|
||||
{
|
||||
int VID;
|
||||
float Dist;
|
||||
|
||||
BasicResult() : VID(-1), Dist(MaxDist) {}
|
||||
|
||||
BasicResult(int p_vid, float p_dist) : VID(p_vid), Dist(p_dist) {}
|
||||
};
|
||||
|
||||
|
||||
// Space to save temporary answer, similar with TopKCache
|
||||
class QueryResult
|
||||
{
|
||||
@ -38,39 +27,26 @@ public:
|
||||
|
||||
|
||||
QueryResult(const void* p_target, int p_resultNum, bool p_withMeta)
|
||||
: m_target(nullptr),
|
||||
m_resultNum(0),
|
||||
m_withMeta(false)
|
||||
{
|
||||
Init(p_target, p_resultNum, p_withMeta);
|
||||
}
|
||||
|
||||
|
||||
QueryResult(const void* p_target, int p_resultNum, std::vector<BasicResult>& p_results)
|
||||
QueryResult(const void* p_target, int p_resultNum, bool p_withMeta, BasicResult* p_results)
|
||||
: m_target(p_target),
|
||||
m_resultNum(p_resultNum),
|
||||
m_withMeta(false)
|
||||
m_withMeta(p_withMeta)
|
||||
{
|
||||
p_results.resize(p_resultNum);
|
||||
m_results.reset(p_results.data());
|
||||
m_results.Set(p_results, p_resultNum, false);
|
||||
}
|
||||
|
||||
|
||||
QueryResult(const QueryResult& p_other)
|
||||
: m_target(p_other.m_target),
|
||||
m_resultNum(p_other.m_resultNum),
|
||||
m_withMeta(p_other.m_withMeta)
|
||||
{
|
||||
Init(p_other.m_target, p_other.m_resultNum, p_other.m_withMeta);
|
||||
if (m_resultNum > 0)
|
||||
{
|
||||
m_results.reset(new BasicResult[m_resultNum]);
|
||||
std::memcpy(m_results.get(), p_other.m_results.get(), sizeof(BasicResult) * m_resultNum);
|
||||
|
||||
if (m_withMeta)
|
||||
{
|
||||
m_metadatas.reset(new ByteArray[m_resultNum]);
|
||||
std::copy(p_other.m_metadatas.get(), p_other.m_metadatas.get() + m_resultNum, m_metadatas.get());
|
||||
}
|
||||
std::copy(p_other.m_results.Data(), p_other.m_results.Data() + m_resultNum, m_results.Data());
|
||||
}
|
||||
}
|
||||
|
||||
@ -78,14 +54,9 @@ public:
|
||||
QueryResult& operator=(const QueryResult& p_other)
|
||||
{
|
||||
Init(p_other.m_target, p_other.m_resultNum, p_other.m_withMeta);
|
||||
|
||||
if (m_resultNum > 0)
|
||||
{
|
||||
std::memcpy(m_results.get(), p_other.m_results.get(), sizeof(BasicResult) * m_resultNum);
|
||||
if (m_withMeta)
|
||||
{
|
||||
std::copy(p_other.m_metadatas.get(), p_other.m_metadatas.get() + m_resultNum, m_metadatas.get());
|
||||
}
|
||||
std::copy(p_other.m_results.Data(), p_other.m_results.Data() + m_resultNum, m_results.Data());
|
||||
}
|
||||
|
||||
return *this;
|
||||
@ -100,18 +71,10 @@ public:
|
||||
inline void Init(const void* p_target, int p_resultNum, bool p_withMeta)
|
||||
{
|
||||
m_target = p_target;
|
||||
if (p_resultNum > m_resultNum)
|
||||
{
|
||||
m_results.reset(new BasicResult[p_resultNum]);
|
||||
}
|
||||
|
||||
if (p_withMeta && (!m_withMeta || p_resultNum > m_resultNum))
|
||||
{
|
||||
m_metadatas.reset(new ByteArray[p_resultNum]);
|
||||
}
|
||||
|
||||
m_resultNum = p_resultNum;
|
||||
m_withMeta = p_withMeta;
|
||||
|
||||
m_results = Array<BasicResult>::Alloc(p_resultNum);
|
||||
}
|
||||
|
||||
|
||||
@ -135,11 +98,11 @@ public:
|
||||
|
||||
inline BasicResult* GetResult(int i) const
|
||||
{
|
||||
return i < m_resultNum ? m_results.get() + i : nullptr;
|
||||
return i < m_resultNum ? m_results.Data() + i : nullptr;
|
||||
}
|
||||
|
||||
|
||||
inline void SetResult(int p_index, int p_VID, float p_dist)
|
||||
inline void SetResult(int p_index, SizeType p_VID, float p_dist)
|
||||
{
|
||||
if (p_index < m_resultNum)
|
||||
{
|
||||
@ -151,7 +114,7 @@ public:
|
||||
|
||||
inline BasicResult* GetResults() const
|
||||
{
|
||||
return m_results.get();
|
||||
return m_results.Data();
|
||||
}
|
||||
|
||||
|
||||
@ -165,7 +128,7 @@ public:
|
||||
{
|
||||
if (p_index < m_resultNum && m_withMeta)
|
||||
{
|
||||
return m_metadatas[p_index];
|
||||
return m_results[p_index].Meta;
|
||||
}
|
||||
|
||||
return ByteArray::c_empty;
|
||||
@ -176,7 +139,7 @@ public:
|
||||
{
|
||||
if (p_index < m_resultNum && m_withMeta)
|
||||
{
|
||||
m_metadatas[p_index] = std::move(p_metadata);
|
||||
m_results[p_index].Meta = std::move(p_metadata);
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,39 +150,32 @@ public:
|
||||
{
|
||||
m_results[i].VID = -1;
|
||||
m_results[i].Dist = MaxDist;
|
||||
}
|
||||
|
||||
if (m_withMeta)
|
||||
{
|
||||
for (int i = 0; i < m_resultNum; i++)
|
||||
{
|
||||
m_metadatas[i].Clear();
|
||||
}
|
||||
m_results[i].Meta.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
iterator begin()
|
||||
{
|
||||
return m_results.get();
|
||||
return m_results.Data();
|
||||
}
|
||||
|
||||
|
||||
iterator end()
|
||||
{
|
||||
return m_results.get() + m_resultNum;
|
||||
return m_results.Data() + m_resultNum;
|
||||
}
|
||||
|
||||
|
||||
const_iterator begin() const
|
||||
{
|
||||
return m_results.get();
|
||||
return m_results.Data();
|
||||
}
|
||||
|
||||
|
||||
const_iterator end() const
|
||||
{
|
||||
return m_results.get() + m_resultNum;
|
||||
return m_results.Data() + m_resultNum;
|
||||
}
|
||||
|
||||
|
||||
@ -230,9 +186,7 @@ protected:
|
||||
|
||||
bool m_withMeta;
|
||||
|
||||
std::unique_ptr<BasicResult[]> m_results;
|
||||
|
||||
std::unique_ptr<ByteArray[]> m_metadatas;
|
||||
Array<BasicResult> m_results;
|
||||
};
|
||||
} // namespace SPTAG
|
||||
|
||||
|
26
core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchResult.h
vendored
Normal file
26
core/src/index/thirdparty/SPTAG/AnnService/inc/Core/SearchResult.h
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#ifndef _SPTAG_SEARCHRESULT_H_
|
||||
#define _SPTAG_SEARCHRESULT_H_
|
||||
|
||||
#include "CommonDataStructure.h"
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
struct BasicResult
|
||||
{
|
||||
SizeType VID;
|
||||
float Dist;
|
||||
ByteArray Meta;
|
||||
|
||||
BasicResult() : VID(-1), Dist(MaxDist) {}
|
||||
|
||||
BasicResult(SizeType p_vid, float p_dist) : VID(p_vid), Dist(p_dist) {}
|
||||
|
||||
BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta) : VID(p_vid), Dist(p_dist), Meta(p_meta) {}
|
||||
};
|
||||
|
||||
} // namespace SPTAG
|
||||
|
||||
#endif // _SPTAG_SEARCHRESULT_H_
|
@ -10,6 +10,8 @@
|
||||
#include "MetadataSet.h"
|
||||
#include "inc/Helper/SimpleIniReader.h"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
|
||||
@ -20,59 +22,58 @@ public:
|
||||
|
||||
virtual ~VectorIndex();
|
||||
|
||||
virtual ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout) = 0;
|
||||
virtual ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension) = 0;
|
||||
|
||||
virtual ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader) = 0;
|
||||
virtual ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr) = 0;
|
||||
|
||||
virtual ErrorCode SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t>& p_indexBlobsLen) = 0;
|
||||
|
||||
virtual ErrorCode LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs) = 0;
|
||||
|
||||
virtual ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension) = 0;
|
||||
virtual ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum) = 0;
|
||||
|
||||
virtual ErrorCode SearchIndex(QueryResult& p_results) const = 0;
|
||||
|
||||
virtual ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) = 0;
|
||||
|
||||
virtual ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum) = 0;
|
||||
|
||||
//virtual ErrorCode AddIndexWithID(const void* p_vector, const int& p_id) = 0;
|
||||
|
||||
//virtual ErrorCode DeleteIndexWithID(const void* p_vector, const int& p_id) = 0;
|
||||
|
||||
virtual float ComputeDistance(const void* pX, const void* pY) const = 0;
|
||||
virtual const void* GetSample(const int idx) const = 0;
|
||||
virtual int GetFeatureDim() const = 0;
|
||||
virtual int GetNumSamples() const = 0;
|
||||
virtual const void* GetSample(const SizeType idx) const = 0;
|
||||
virtual bool ContainSample(const SizeType idx) const = 0;
|
||||
virtual bool NeedRefine() const = 0;
|
||||
|
||||
virtual DimensionType GetFeatureDim() const = 0;
|
||||
virtual SizeType GetNumSamples() const = 0;
|
||||
|
||||
virtual DistCalcMethod GetDistCalcMethod() const = 0;
|
||||
virtual IndexAlgoType GetIndexAlgoType() const = 0;
|
||||
virtual VectorValueType GetVectorValueType() const = 0;
|
||||
virtual int GetNumThreads() const = 0;
|
||||
|
||||
virtual std::string GetParameter(const char* p_param) const = 0;
|
||||
virtual ErrorCode SetParameter(const char* p_param, const char* p_value) = 0;
|
||||
|
||||
virtual std::shared_ptr<std::vector<std::uint64_t>> CalculateBufferSize() const;
|
||||
|
||||
virtual ErrorCode LoadIndex(const std::string& p_config, const std::vector<ByteArray>& p_indexBlobs);
|
||||
|
||||
virtual ErrorCode LoadIndex(const std::string& p_folderPath);
|
||||
|
||||
virtual ErrorCode SaveIndex(std::string& p_config, const std::vector<ByteArray>& p_indexBlobs);
|
||||
|
||||
virtual ErrorCode SaveIndex(const std::string& p_folderPath);
|
||||
|
||||
virtual ErrorCode BuildIndex(std::shared_ptr<VectorSet> p_vectorSet, std::shared_ptr<MetadataSet> p_metadataSet);
|
||||
|
||||
virtual ErrorCode SearchIndex(const void* p_vector, int p_neighborCount, std::vector<BasicResult>& p_results) const;
|
||||
virtual ErrorCode BuildIndex(std::shared_ptr<VectorSet> p_vectorSet, std::shared_ptr<MetadataSet> p_metadataSet, bool p_withMetaIndex = false);
|
||||
|
||||
virtual ErrorCode AddIndex(std::shared_ptr<VectorSet> p_vectorSet, std::shared_ptr<MetadataSet> p_metadataSet);
|
||||
|
||||
virtual ErrorCode DeleteIndex(ByteArray p_meta);
|
||||
|
||||
virtual const void* GetSample(ByteArray p_meta);
|
||||
|
||||
virtual ErrorCode SearchIndex(const void* p_vector, int p_neighborCount, bool p_withMeta, BasicResult* p_results) const;
|
||||
|
||||
virtual std::string GetParameter(const std::string& p_param) const;
|
||||
virtual ErrorCode SetParameter(const std::string& p_param, const std::string& p_value);
|
||||
|
||||
virtual ByteArray GetMetadata(IndexType p_vectorID) const;
|
||||
virtual ByteArray GetMetadata(SizeType p_vectorID) const;
|
||||
virtual void SetMetadata(const std::string& p_metadataFilePath, const std::string& p_metadataIndexPath);
|
||||
|
||||
virtual std::string GetIndexName() const
|
||||
{
|
||||
if (m_sIndexName == "")
|
||||
return Helper::Convert::ConvertToString(GetIndexAlgoType());
|
||||
if (m_sIndexName == "") return Helper::Convert::ConvertToString(GetIndexAlgoType());
|
||||
return m_sIndexName;
|
||||
}
|
||||
virtual void SetIndexName(std::string p_name) { m_sIndexName = p_name; }
|
||||
@ -83,9 +84,42 @@ public:
|
||||
|
||||
static ErrorCode LoadIndex(const std::string& p_loaderFilePath, std::shared_ptr<VectorIndex>& p_vectorIndex);
|
||||
|
||||
static ErrorCode LoadIndex(const std::string& p_config, const std::vector<ByteArray>& p_indexBlobs, std::shared_ptr<VectorIndex>& p_vectorIndex);
|
||||
|
||||
protected:
|
||||
virtual std::shared_ptr<std::vector<std::uint64_t>> BufferSize() const = 0;
|
||||
|
||||
virtual ErrorCode SaveConfig(std::ostream& p_configout) const = 0;
|
||||
|
||||
virtual ErrorCode SaveIndexData(const std::string& p_folderPath) = 0;
|
||||
|
||||
virtual ErrorCode SaveIndexData(const std::vector<std::ostream*>& p_indexStreams) = 0;
|
||||
|
||||
virtual ErrorCode LoadConfig(Helper::IniReader& p_reader) = 0;
|
||||
|
||||
virtual ErrorCode LoadIndexData(const std::string& p_folderPath) = 0;
|
||||
|
||||
virtual ErrorCode LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs) = 0;
|
||||
|
||||
virtual ErrorCode DeleteIndex(const SizeType& p_id) = 0;
|
||||
|
||||
virtual ErrorCode RefineIndex(const std::string& p_folderPath) = 0;
|
||||
|
||||
virtual ErrorCode RefineIndex(const std::vector<std::ostream*>& p_indexStreams) = 0;
|
||||
|
||||
private:
|
||||
void BuildMetaMapping();
|
||||
|
||||
ErrorCode LoadIndexConfig(Helper::IniReader& p_reader);
|
||||
|
||||
ErrorCode SaveIndexConfig(std::ostream& p_configOut);
|
||||
|
||||
protected:
|
||||
std::string m_sIndexName;
|
||||
std::string m_sMetadataFile = "metadata.bin";
|
||||
std::string m_sMetadataIndexFile = "metadataIndex.bin";
|
||||
std::shared_ptr<MetadataSet> m_pMetadata;
|
||||
std::unique_ptr<std::unordered_map<std::string, SizeType>> m_pMetaToVec;
|
||||
};
|
||||
|
||||
|
||||
|
@ -18,11 +18,11 @@ public:
|
||||
|
||||
virtual VectorValueType GetValueType() const = 0;
|
||||
|
||||
virtual void* GetVector(IndexType p_vectorID) const = 0;
|
||||
virtual void* GetVector(SizeType p_vectorID) const = 0;
|
||||
|
||||
virtual void* GetData() const = 0;
|
||||
|
||||
virtual SizeType Dimension() const = 0;
|
||||
virtual DimensionType Dimension() const = 0;
|
||||
|
||||
virtual SizeType Count() const = 0;
|
||||
|
||||
@ -37,18 +37,18 @@ class BasicVectorSet : public VectorSet
|
||||
public:
|
||||
BasicVectorSet(const ByteArray& p_bytesArray,
|
||||
VectorValueType p_valueType,
|
||||
SizeType p_dimension,
|
||||
DimensionType p_dimension,
|
||||
SizeType p_vectorCount);
|
||||
|
||||
virtual ~BasicVectorSet();
|
||||
|
||||
virtual VectorValueType GetValueType() const;
|
||||
|
||||
virtual void* GetVector(IndexType p_vectorID) const;
|
||||
virtual void* GetVector(SizeType p_vectorID) const;
|
||||
|
||||
virtual void* GetData() const;
|
||||
|
||||
virtual SizeType Dimension() const;
|
||||
virtual DimensionType Dimension() const;
|
||||
|
||||
virtual SizeType Count() const;
|
||||
|
||||
@ -61,7 +61,7 @@ private:
|
||||
|
||||
VectorValueType m_valueType;
|
||||
|
||||
SizeType m_dimension;
|
||||
DimensionType m_dimension;
|
||||
|
||||
SizeType m_vectorCount;
|
||||
|
||||
|
39
core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/BufferStream.h
vendored
Normal file
39
core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/BufferStream.h
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#ifndef _SPTAG_HELPER_BUFFERSTREAM_H_
|
||||
#define _SPTAG_HELPER_BUFFERSTREAM_H_
|
||||
|
||||
#include <streambuf>
|
||||
#include <ostream>
|
||||
#include <memory>
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
namespace Helper
|
||||
{
|
||||
struct streambuf : public std::basic_streambuf<char>
|
||||
{
|
||||
streambuf(char* buffer, size_t size)
|
||||
{
|
||||
setp(buffer, buffer + size);
|
||||
}
|
||||
};
|
||||
|
||||
class obufferstream : public std::ostream
|
||||
{
|
||||
public:
|
||||
obufferstream(streambuf* buf, bool transferOwnership) : std::ostream(buf)
|
||||
{
|
||||
if (transferOwnership)
|
||||
m_bufHolder.reset(buf, std::default_delete<streambuf>());
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<streambuf> m_bufHolder;
|
||||
};
|
||||
} // namespace Helper
|
||||
} // namespace SPTAG
|
||||
|
||||
#endif // _SPTAG_HELPER_BUFFERSTREAM_H_
|
||||
|
148
core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/ConcurrentSet.h
vendored
Normal file
148
core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/ConcurrentSet.h
vendored
Normal file
@ -0,0 +1,148 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#ifndef _SPTAG_HELPER_CONCURRENTSET_H_
|
||||
#define _SPTAG_HELPER_CONCURRENTSET_H_
|
||||
|
||||
#include <shared_mutex>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
namespace Helper
|
||||
{
|
||||
namespace Concurrent
|
||||
{
|
||||
template <typename T>
|
||||
class ConcurrentSet
|
||||
{
|
||||
public:
|
||||
ConcurrentSet();
|
||||
|
||||
~ConcurrentSet();
|
||||
|
||||
size_t size() const;
|
||||
|
||||
bool contains(const T& key) const;
|
||||
|
||||
void insert(const T& key);
|
||||
|
||||
std::shared_timed_mutex& getLock();
|
||||
|
||||
bool save(std::ostream& output);
|
||||
|
||||
bool save(std::string filename);
|
||||
|
||||
bool load(std::string filename);
|
||||
|
||||
bool load(char* pmemoryFile);
|
||||
|
||||
std::uint64_t bufferSize() const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<std::shared_timed_mutex> m_lock;
|
||||
std::unordered_set<T> m_data;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
ConcurrentSet<T>::ConcurrentSet()
|
||||
{
|
||||
m_lock.reset(new std::shared_timed_mutex);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ConcurrentSet<T>::~ConcurrentSet()
|
||||
{
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
size_t ConcurrentSet<T>::size() const
|
||||
{
|
||||
std::shared_lock<std::shared_timed_mutex> lock(*m_lock);
|
||||
return m_data.size();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool ConcurrentSet<T>::contains(const T& key) const
|
||||
{
|
||||
std::shared_lock<std::shared_timed_mutex> lock(*m_lock);
|
||||
return (m_data.find(key) != m_data.end());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void ConcurrentSet<T>::insert(const T& key)
|
||||
{
|
||||
std::unique_lock<std::shared_timed_mutex> lock(*m_lock);
|
||||
m_data.insert(key);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::shared_timed_mutex& ConcurrentSet<T>::getLock()
|
||||
{
|
||||
return *m_lock;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::uint64_t ConcurrentSet<T>::bufferSize() const
|
||||
{
|
||||
return sizeof(SizeType) + sizeof(T) * m_data.size();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool ConcurrentSet<T>::save(std::ostream& output)
|
||||
{
|
||||
SizeType count = (SizeType)m_data.size();
|
||||
output.write((char*)&count, sizeof(SizeType));
|
||||
for (auto iter = m_data.begin(); iter != m_data.end(); iter++)
|
||||
output.write((char*)&(*iter), sizeof(T));
|
||||
std::cout << "Save DeleteID (" << count << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool ConcurrentSet<T>::save(std::string filename)
|
||||
{
|
||||
std::cout << "Save DeleteID To " << filename << std::endl;
|
||||
std::ofstream output(filename, std::ios::binary);
|
||||
if (!output.is_open()) return false;
|
||||
save(output);
|
||||
output.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool ConcurrentSet<T>::load(std::string filename)
|
||||
{
|
||||
std::cout << "Load DeleteID From " << filename << std::endl;
|
||||
std::ifstream input(filename, std::ios::binary);
|
||||
if (!input.is_open()) return false;
|
||||
|
||||
SizeType count;
|
||||
T ID;
|
||||
input.read((char*)&count, sizeof(SizeType));
|
||||
for (SizeType i = 0; i < count; i++)
|
||||
{
|
||||
input.read((char*)&ID, sizeof(T));
|
||||
m_data.insert(ID);
|
||||
}
|
||||
input.close();
|
||||
std::cout << "Load DeleteID (" << count << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool ConcurrentSet<T>::load(char* pmemoryFile)
|
||||
{
|
||||
SizeType count;
|
||||
count = *((SizeType*)pmemoryFile);
|
||||
pmemoryFile += sizeof(SizeType);
|
||||
|
||||
m_data.insert((T*)pmemoryFile, ((T*)pmemoryFile) + count);
|
||||
pmemoryFile += sizeof(T) * count;
|
||||
std::cout << "Load DeleteID (" << count << ") Finish!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // _SPTAG_HELPER_CONCURRENTSET_H_
|
@ -31,6 +31,8 @@ public:
|
||||
|
||||
ErrorCode LoadIniFile(const std::string& p_iniFilePath);
|
||||
|
||||
ErrorCode LoadIni(std::istream& p_input);
|
||||
|
||||
bool DoesSectionExist(const std::string& p_section) const;
|
||||
|
||||
bool DoesParameterExist(const std::string& p_section, const std::string& p_param) const;
|
||||
|
59
core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/VectorSetReader.h
vendored
Normal file
59
core/src/index/thirdparty/SPTAG/AnnService/inc/Helper/VectorSetReader.h
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#ifndef _SPTAG_HELPER_VECTORSETREADER_H_
|
||||
#define _SPTAG_HELPER_VECTORSETREADER_H_
|
||||
|
||||
#include "inc/Core/Common.h"
|
||||
#include "inc/Core/VectorSet.h"
|
||||
#include "inc/Core/MetadataSet.h"
|
||||
#include "inc/Helper/ArgumentsParser.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
namespace Helper
|
||||
{
|
||||
|
||||
class ReaderOptions : public ArgumentsParser
|
||||
{
|
||||
public:
|
||||
ReaderOptions(VectorValueType p_valueType, DimensionType p_dimension, std::string p_vectorDelimiter = "|", std::uint32_t p_threadNum = 32);
|
||||
|
||||
~ReaderOptions();
|
||||
|
||||
std::uint32_t m_threadNum;
|
||||
|
||||
DimensionType m_dimension;
|
||||
|
||||
std::string m_vectorDelimiter;
|
||||
|
||||
SPTAG::VectorValueType m_inputValueType;
|
||||
};
|
||||
|
||||
class VectorSetReader
|
||||
{
|
||||
public:
|
||||
VectorSetReader(std::shared_ptr<ReaderOptions> p_options);
|
||||
|
||||
virtual ~VectorSetReader();
|
||||
|
||||
virtual ErrorCode LoadFile(const std::string& p_filePath) = 0;
|
||||
|
||||
virtual std::shared_ptr<VectorSet> GetVectorSet() const = 0;
|
||||
|
||||
virtual std::shared_ptr<MetadataSet> GetMetadataSet() const = 0;
|
||||
|
||||
static std::shared_ptr<VectorSetReader> CreateInstance(std::shared_ptr<ReaderOptions> p_options);
|
||||
|
||||
protected:
|
||||
std::shared_ptr<ReaderOptions> m_options;
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace Helper
|
||||
} // namespace SPTAG
|
||||
|
||||
#endif // _SPTAG_HELPER_VECTORSETREADER_H_
|
@ -1,8 +1,8 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#ifndef _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULTREADER_H_
|
||||
#define _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULTREADER_H_
|
||||
#ifndef _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_
|
||||
#define _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_
|
||||
|
||||
#include "../VectorSetReader.h"
|
||||
#include "inc/Helper/Concurrent.h"
|
||||
@ -13,13 +13,13 @@
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
namespace IndexBuilder
|
||||
namespace Helper
|
||||
{
|
||||
|
||||
class DefaultReader : public VectorSetReader
|
||||
{
|
||||
public:
|
||||
DefaultReader(std::shared_ptr<BuilderOptions> p_options);
|
||||
DefaultReader(std::shared_ptr<ReaderOptions> p_options);
|
||||
|
||||
virtual ~DefaultReader();
|
||||
|
||||
@ -44,7 +44,7 @@ private:
|
||||
template<typename DataType>
|
||||
bool TranslateVector(char* p_str, DataType* p_vector)
|
||||
{
|
||||
std::uint32_t eleCount = 0;
|
||||
DimensionType eleCount = 0;
|
||||
char* next = p_str;
|
||||
while ((*next) != '\0')
|
||||
{
|
||||
@ -85,11 +85,11 @@ private:
|
||||
|
||||
std::size_t m_subTaskBlocksize;
|
||||
|
||||
std::atomic<std::uint32_t> m_totalRecordCount;
|
||||
std::atomic<SizeType> m_totalRecordCount;
|
||||
|
||||
std::atomic<std::size_t> m_totalRecordVectorBytes;
|
||||
|
||||
std::vector<std::uint32_t> m_subTaskRecordCount;
|
||||
std::vector<SizeType> m_subTaskRecordCount;
|
||||
|
||||
std::string m_vectorOutput;
|
||||
|
||||
@ -102,7 +102,7 @@ private:
|
||||
|
||||
|
||||
|
||||
} // namespace IndexBuilder
|
||||
} // namespace Helper
|
||||
} // namespace SPTAG
|
||||
|
||||
#endif // _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULT_H_
|
||||
#endif // _SPTAG_HELPER_VECTORSETREADERS_DEFAULT_H_
|
@ -5,7 +5,7 @@
|
||||
#define _SPTAG_INDEXBUILDER_OPTIONS_H_
|
||||
|
||||
#include "inc/Core/Common.h"
|
||||
#include "inc/Helper/ArgumentsParser.h"
|
||||
#include "inc/Helper/VectorSetReader.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -16,21 +16,13 @@ namespace SPTAG
|
||||
namespace IndexBuilder
|
||||
{
|
||||
|
||||
class BuilderOptions : public Helper::ArgumentsParser
|
||||
class BuilderOptions : public Helper::ReaderOptions
|
||||
{
|
||||
public:
|
||||
BuilderOptions();
|
||||
|
||||
~BuilderOptions();
|
||||
|
||||
std::uint32_t m_threadNum;
|
||||
|
||||
std::uint32_t m_dimension;
|
||||
|
||||
std::string m_vectorDelimiter;
|
||||
|
||||
SPTAG::VectorValueType m_inputValueType;
|
||||
|
||||
std::string m_inputFiles;
|
||||
|
||||
std::string m_outputFolder;
|
||||
|
@ -1,43 +0,0 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#ifndef _SPTAG_INDEXBUILDER_VECTORSETREADER_H_
|
||||
#define _SPTAG_INDEXBUILDER_VECTORSETREADER_H_
|
||||
|
||||
#include "inc/Core/Common.h"
|
||||
#include "inc/Core/VectorSet.h"
|
||||
#include "inc/Core/MetadataSet.h"
|
||||
#include "Options.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace SPTAG
|
||||
{
|
||||
namespace IndexBuilder
|
||||
{
|
||||
|
||||
class VectorSetReader
|
||||
{
|
||||
public:
|
||||
VectorSetReader(std::shared_ptr<BuilderOptions> p_options);
|
||||
|
||||
virtual ~VectorSetReader();
|
||||
|
||||
virtual ErrorCode LoadFile(const std::string& p_filePath) = 0;
|
||||
|
||||
virtual std::shared_ptr<VectorSet> GetVectorSet() const = 0;
|
||||
|
||||
virtual std::shared_ptr<MetadataSet> GetMetadataSet() const = 0;
|
||||
|
||||
static std::shared_ptr<VectorSetReader> CreateInstance(std::shared_ptr<BuilderOptions> p_options);
|
||||
|
||||
protected:
|
||||
std::shared_ptr<BuilderOptions> m_options;
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace IndexBuilder
|
||||
} // namespace SPTAG
|
||||
|
||||
#endif // _SPTAG_INDEXBUILDER_VECTORSETREADER_H_
|
@ -7,6 +7,4 @@
|
||||
<package id="boost_system-vc140" version="1.67.0.0" targetFramework="native" />
|
||||
<package id="boost_thread-vc140" version="1.67.0.0" targetFramework="native" />
|
||||
<package id="boost_wserialization-vc140" version="1.67.0.0" targetFramework="native" />
|
||||
<package id="tbb_oss" version="9.107.0.0" targetFramework="native" />
|
||||
<package id="tbb_oss.redist" version="9.107.0.0" targetFramework="native" />
|
||||
</packages>
|
@ -53,19 +53,19 @@ int main(int argc, char** argv)
|
||||
|
||||
for (const auto& indexRes : result.m_allIndexResults)
|
||||
{
|
||||
fprintf(stdout, "Index: %s\n", indexRes.m_indexName.c_str());
|
||||
std::cout << "Index: " << indexRes.m_indexName << std::endl;
|
||||
|
||||
int idx = 0;
|
||||
for (const auto& res : indexRes.m_results)
|
||||
{
|
||||
fprintf(stdout, "------------------\n");
|
||||
fprintf(stdout, "DocIndex: %d Distance: %f\n", res.VID, res.Dist);
|
||||
std::cout << "------------------" << std::endl;
|
||||
std::cout << "DocIndex: " << res.VID << " Distance: " << res.Dist;
|
||||
if (indexRes.m_results.WithMeta())
|
||||
{
|
||||
const auto& metadata = indexRes.m_results.GetMetadata(idx);
|
||||
fprintf(stdout, " MetaData: %.*s\n", static_cast<int>(metadata.Length()), metadata.Data());
|
||||
std::cout << " MetaData: " << std::string((char*)metadata.Data(), metadata.Length());
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
|
@ -13,22 +13,7 @@ namespace SPTAG
|
||||
namespace BKT
|
||||
{
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs)
|
||||
{
|
||||
if (!m_pSamples.Load((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue;
|
||||
if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue;
|
||||
if (!m_pGraph.LoadGraphFromMemory((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue;
|
||||
m_pMetadata = std::make_shared<MemMetadataSet>();
|
||||
if (ErrorCode::Success != m_pMetadata->LoadMetadataFromMemory((char*)p_indexBlobs[3]))
|
||||
return ErrorCode::FailedParseValue;
|
||||
|
||||
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
|
||||
m_workSpacePool->Init(m_iNumberOfThreads);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader)
|
||||
ErrorCode Index<T>::LoadConfig(Helper::IniReader& p_reader)
|
||||
{
|
||||
#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \
|
||||
SetParameter(RepresentStr, \
|
||||
@ -38,34 +23,96 @@ namespace SPTAG
|
||||
|
||||
#include "inc/Core/BKT/ParameterDefinitionList.h"
|
||||
#undef DefineBKTParameter
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
|
||||
if (!m_pTrees.LoadTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail;
|
||||
if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs)
|
||||
{
|
||||
if (p_indexBlobs.size() < 3) return ErrorCode::LackOfInputs;
|
||||
|
||||
if (!m_pSamples.Load((char*)p_indexBlobs[0].Data())) return ErrorCode::FailedParseValue;
|
||||
if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1].Data())) return ErrorCode::FailedParseValue;
|
||||
if (!m_pGraph.LoadGraph((char*)p_indexBlobs[2].Data())) return ErrorCode::FailedParseValue;
|
||||
if (p_indexBlobs.size() > 3 && !m_deletedID.load((char*)p_indexBlobs[3].Data())) return ErrorCode::FailedParseValue;
|
||||
|
||||
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
|
||||
m_workSpacePool->Init(m_iNumberOfThreads);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::LoadIndexData(const std::string& p_folderPath)
|
||||
{
|
||||
if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
|
||||
if (!m_pTrees.LoadTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail;
|
||||
if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
|
||||
if (!m_deletedID.load(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail;
|
||||
|
||||
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
|
||||
m_workSpacePool->Init(m_iNumberOfThreads);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::SaveConfig(std::ostream& p_configOut) const
|
||||
{
|
||||
#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \
|
||||
p_configOut << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl;
|
||||
|
||||
#include "inc/Core/BKT/ParameterDefinitionList.h"
|
||||
#undef DefineBKTParameter
|
||||
p_configOut << std::endl;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode
|
||||
Index<T>::SaveIndexData(const std::string& p_folderPath)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_dataAddLock);
|
||||
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
|
||||
|
||||
if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
|
||||
if (!m_pTrees.SaveTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail;
|
||||
if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
|
||||
if (!m_deletedID.save(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode Index<T>::SaveIndexData(const std::vector<std::ostream*>& p_indexStreams)
|
||||
{
|
||||
if (p_indexStreams.size() < 4) return ErrorCode::LackOfInputs;
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_dataAddLock);
|
||||
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
|
||||
|
||||
if (!m_pSamples.Save(*p_indexStreams[0])) return ErrorCode::Fail;
|
||||
if (!m_pTrees.SaveTrees(*p_indexStreams[1])) return ErrorCode::Fail;
|
||||
if (!m_pGraph.SaveGraph(*p_indexStreams[2])) return ErrorCode::Fail;
|
||||
if (!m_deletedID.save(*p_indexStreams[3])) return ErrorCode::Fail;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
#pragma region K-NN search
|
||||
|
||||
#define Search(CheckDeleted1) \
|
||||
m_pTrees.InitSearchTrees(this, p_query, p_space); \
|
||||
const int checkPos = m_pGraph.m_iNeighborhoodSize - 1; \
|
||||
const DimensionType checkPos = m_pGraph.m_iNeighborhoodSize - 1; \
|
||||
while (!p_space.m_SPTQueue.empty()) { \
|
||||
m_pTrees.SearchTrees(this, p_query, p_space, m_iNumberOfOtherDynamicPivots + p_space.m_iNumberOfCheckedLeaves); \
|
||||
while (!p_space.m_NGQueue.empty()) { \
|
||||
COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); \
|
||||
const int *node = m_pGraph[gnode.node]; \
|
||||
const SizeType *node = m_pGraph[gnode.node]; \
|
||||
_mm_prefetch((const char *)node, _MM_HINT_T0); \
|
||||
CheckDeleted1 { \
|
||||
if (p_query.AddPoint(gnode.node, gnode.distance)) { \
|
||||
p_space.m_iNumOfContinuousNoBetterPropagation = 0; \
|
||||
int checkNode = node[checkPos]; \
|
||||
SizeType checkNode = node[checkPos]; \
|
||||
if (checkNode < -1) { \
|
||||
const COMMON::BKTNode& tnode = m_pTrees[-2 - checkNode]; \
|
||||
for (int i = -tnode.childStart; i < tnode.childEnd; i++) { \
|
||||
for (SizeType i = -tnode.childStart; i < tnode.childEnd; i++) { \
|
||||
if (!p_query.AddPoint(m_pTrees[i].centerid, gnode.distance)) break; \
|
||||
} \
|
||||
} \
|
||||
@ -77,11 +124,11 @@ namespace SPTAG
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
for (int i = 0; i <= checkPos; i++) { \
|
||||
for (DimensionType i = 0; i <= checkPos; i++) { \
|
||||
_mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \
|
||||
} \
|
||||
for (int i = 0; i <= checkPos; i++) { \
|
||||
int nn_index = node[i]; \
|
||||
for (DimensionType i = 0; i <= checkPos; i++) { \
|
||||
SizeType nn_index = node[i]; \
|
||||
if (nn_index < 0) break; \
|
||||
if (p_space.CheckAndSet(nn_index)) continue; \
|
||||
float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \
|
||||
@ -96,9 +143,9 @@ namespace SPTAG
|
||||
p_query.SortResult(); \
|
||||
|
||||
template <typename T>
|
||||
void Index<T>::SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set<int> &p_deleted) const
|
||||
void Index<T>::SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet<SizeType> &p_deleted) const
|
||||
{
|
||||
Search(if (p_deleted.find(gnode.node) == p_deleted.end()))
|
||||
Search(if (!p_deleted.contains(gnode.node)))
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -125,7 +172,7 @@ namespace SPTAG
|
||||
{
|
||||
for (int i = 0; i < p_query.GetResultNum(); ++i)
|
||||
{
|
||||
int result = p_query.GetResult(i)->VID;
|
||||
SizeType result = p_query.GetResult(i)->VID;
|
||||
p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result));
|
||||
}
|
||||
}
|
||||
@ -134,7 +181,7 @@ namespace SPTAG
|
||||
#pragma endregion
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::BuildIndex(const void* p_data, int p_vectorNum, int p_dimension)
|
||||
ErrorCode Index<T>::BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension)
|
||||
{
|
||||
omp_set_num_threads(m_iNumberOfThreads);
|
||||
|
||||
@ -144,20 +191,64 @@ namespace SPTAG
|
||||
{
|
||||
int base = COMMON::Utils::GetBase<T>();
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < GetNumSamples(); i++) {
|
||||
for (SizeType i = 0; i < GetNumSamples(); i++) {
|
||||
COMMON::Utils::Normalize(m_pSamples[i], GetFeatureDim(), base);
|
||||
}
|
||||
}
|
||||
|
||||
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
|
||||
m_workSpacePool->Init(m_iNumberOfThreads);
|
||||
|
||||
|
||||
m_pTrees.BuildTrees<T>(this);
|
||||
m_pGraph.BuildGraph<T>(this, &(m_pTrees.GetSampleMap()));
|
||||
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::RefineIndex(const std::vector<std::ostream*>& p_indexStreams)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_dataAddLock);
|
||||
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
|
||||
|
||||
SizeType newR = GetNumSamples();
|
||||
|
||||
std::vector<SizeType> indices;
|
||||
std::vector<SizeType> reverseIndices(newR);
|
||||
for (SizeType i = 0; i < newR; i++) {
|
||||
if (!m_deletedID.contains(i)) {
|
||||
indices.push_back(i);
|
||||
reverseIndices[i] = i;
|
||||
}
|
||||
else {
|
||||
while (m_deletedID.contains(newR - 1) && newR > i) newR--;
|
||||
if (newR == i) break;
|
||||
indices.push_back(newR - 1);
|
||||
reverseIndices[newR - 1] = i;
|
||||
newR--;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl;
|
||||
|
||||
if (false == m_pSamples.Refine(indices, *p_indexStreams[0])) return ErrorCode::Fail;
|
||||
if (nullptr != m_pMetadata && (p_indexStreams.size() < 6 || ErrorCode::Success != m_pMetadata->RefineMetadata(indices, *p_indexStreams[4], *p_indexStreams[5]))) return ErrorCode::Fail;
|
||||
|
||||
COMMON::BKTree newTrees(m_pTrees);
|
||||
newTrees.BuildTrees<T>(this, &indices);
|
||||
#pragma omp parallel for
|
||||
for (SizeType i = 0; i < newTrees.size(); i++) {
|
||||
newTrees[i].centerid = reverseIndices[newTrees[i].centerid];
|
||||
}
|
||||
newTrees.SaveTrees(*p_indexStreams[1]);
|
||||
|
||||
m_pGraph.RefineGraph<T>(this, indices, reverseIndices, *p_indexStreams[2], &(newTrees.GetSampleMap()));
|
||||
|
||||
Helper::Concurrent::ConcurrentSet<SizeType> newDeletedID;
|
||||
newDeletedID.save(*p_indexStreams[3]);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::RefineIndex(const std::string& p_folderPath)
|
||||
{
|
||||
@ -172,54 +263,40 @@ namespace SPTAG
|
||||
mkdir(folderPath.c_str());
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_dataLock);
|
||||
int newR = GetNumSamples();
|
||||
|
||||
std::vector<int> indices;
|
||||
std::vector<int> reverseIndices(newR);
|
||||
for (int i = 0; i < newR; i++) {
|
||||
if (m_deletedID.find(i) == m_deletedID.end()) {
|
||||
indices.push_back(i);
|
||||
reverseIndices[i] = i;
|
||||
}
|
||||
else {
|
||||
while (m_deletedID.find(newR - 1) != m_deletedID.end() && newR > i) newR--;
|
||||
if (newR == i) break;
|
||||
indices.push_back(newR - 1);
|
||||
reverseIndices[newR - 1] = i;
|
||||
newR--;
|
||||
}
|
||||
std::vector<std::ostream*> streams;
|
||||
streams.push_back(new std::ofstream(folderPath + m_sDataPointsFilename, std::ios::binary));
|
||||
streams.push_back(new std::ofstream(folderPath + m_sBKTFilename, std::ios::binary));
|
||||
streams.push_back(new std::ofstream(folderPath + m_sGraphFilename, std::ios::binary));
|
||||
streams.push_back(new std::ofstream(folderPath + m_sDeleteDataPointsFilename, std::ios::binary));
|
||||
if (nullptr != m_pMetadata)
|
||||
{
|
||||
streams.push_back(new std::ofstream(folderPath + m_sMetadataFile, std::ios::binary));
|
||||
streams.push_back(new std::ofstream(folderPath + m_sMetadataIndexFile, std::ios::binary));
|
||||
}
|
||||
|
||||
std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl;
|
||||
for (size_t i = 0; i < streams.size(); i++)
|
||||
if (!(((std::ofstream*)streams[i])->is_open())) return ErrorCode::FailedCreateFile;
|
||||
|
||||
if (false == m_pSamples.Refine(indices, folderPath + m_sDataPointsFilename)) return ErrorCode::FailedCreateFile;
|
||||
if (nullptr != m_pMetadata && ErrorCode::Success != m_pMetadata->RefineMetadata(indices, folderPath)) return ErrorCode::FailedCreateFile;
|
||||
ErrorCode ret = RefineIndex(streams);
|
||||
|
||||
COMMON::BKTree newTrees(m_pTrees);
|
||||
newTrees.BuildTrees<T>(this, &indices);
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < newTrees.size(); i++) {
|
||||
newTrees[i].centerid = reverseIndices[newTrees[i].centerid];
|
||||
for (size_t i = 0; i < streams.size(); i++)
|
||||
{
|
||||
((std::ofstream*)streams[i])->close();
|
||||
delete streams[i];
|
||||
}
|
||||
newTrees.SaveTrees(folderPath + m_sBKTFilename);
|
||||
|
||||
m_pGraph.RefineGraph<T>(this, indices, reverseIndices, folderPath + m_sGraphFilename,
|
||||
&(newTrees.GetSampleMap()));
|
||||
return ErrorCode::Success;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::DeleteIndex(const void* p_vectors, int p_vectorNum) {
|
||||
ErrorCode Index<T>::DeleteIndex(const void* p_vectors, SizeType p_vectorNum) {
|
||||
const T* ptr_v = (const T*)p_vectors;
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
for (int i = 0; i < p_vectorNum; i++) {
|
||||
for (SizeType i = 0; i < p_vectorNum; i++) {
|
||||
COMMON::QueryResultSet<T> query(ptr_v + i * GetFeatureDim(), m_pGraph.m_iCEF);
|
||||
SearchIndex(query);
|
||||
|
||||
for (int i = 0; i < m_pGraph.m_iCEF; i++) {
|
||||
if (query.GetResult(i)->Dist < 1e-6) {
|
||||
std::lock_guard<std::mutex> lock(m_dataLock);
|
||||
m_deletedID.insert(query.GetResult(i)->VID);
|
||||
}
|
||||
}
|
||||
@ -228,40 +305,43 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension)
|
||||
ErrorCode Index<T>::DeleteIndex(const SizeType& p_id) {
|
||||
m_deletedID.insert(p_id);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start)
|
||||
{
|
||||
int begin, end;
|
||||
SizeType begin, end;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_dataLock);
|
||||
|
||||
if (GetNumSamples() == 0)
|
||||
return BuildIndex(p_vectors, p_vectorNum, p_dimension);
|
||||
|
||||
if (p_dimension != GetFeatureDim())
|
||||
return ErrorCode::FailedParseValue;
|
||||
std::lock_guard<std::mutex> lock(m_dataAddLock);
|
||||
|
||||
begin = GetNumSamples();
|
||||
end = GetNumSamples() + p_vectorNum;
|
||||
|
||||
m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum);
|
||||
m_pGraph.AddBatch(p_vectorNum);
|
||||
if (p_start != nullptr) *p_start = begin;
|
||||
|
||||
if (begin == 0) return BuildIndex(p_vectors, p_vectorNum, p_dimension);
|
||||
|
||||
if (m_pSamples.R() != end || m_pGraph.R() != end) {
|
||||
if (p_dimension != GetFeatureDim()) return ErrorCode::FailedParseValue;
|
||||
|
||||
if (m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum) != ErrorCode::Success || m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success) {
|
||||
std::cout << "Memory Error: Cannot alloc space for vectors" << std::endl;
|
||||
m_pSamples.SetR(begin);
|
||||
m_pGraph.SetR(begin);
|
||||
return ErrorCode::Fail;
|
||||
return ErrorCode::MemoryOverFlow;
|
||||
}
|
||||
if (DistCalcMethod::Cosine == m_iDistCalcMethod)
|
||||
{
|
||||
int base = COMMON::Utils::GetBase<T>();
|
||||
for (int i = begin; i < end; i++) {
|
||||
for (SizeType i = begin; i < end; i++) {
|
||||
COMMON::Utils::Normalize((T*)m_pSamples[i], GetFeatureDim(), base);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int node = begin; node < end; node++)
|
||||
for (SizeType node = begin; node < end; node++)
|
||||
{
|
||||
m_pGraph.RefineNode<T>(this, node, true);
|
||||
}
|
||||
@ -269,47 +349,6 @@ namespace SPTAG
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode
|
||||
Index<T>::SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t> &p_indexBlobsLen)
|
||||
{
|
||||
p_indexBlobs.resize(4);
|
||||
p_indexBlobsLen.resize(4);
|
||||
if (!m_pSamples.Save(&p_indexBlobs[0], p_indexBlobsLen[0])) return ErrorCode::Fail;
|
||||
if (!m_pTrees.SaveTrees(&p_indexBlobs[1], p_indexBlobsLen[1])) return ErrorCode::Fail;
|
||||
if (!m_pGraph.SaveGraphToMemory(&p_indexBlobs[2], p_indexBlobsLen[2])) return ErrorCode::Fail;
|
||||
if (ErrorCode::Success != m_pMetadata->SaveMetadataToMemory(&p_indexBlobs[3], p_indexBlobsLen[3]))
|
||||
return ErrorCode::Fail;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode
|
||||
Index<T>::SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout)
|
||||
{
|
||||
m_sDataPointsFilename = "vectors.bin";
|
||||
m_sBKTFilename = "tree.bin";
|
||||
m_sGraphFilename = "graph.bin";
|
||||
|
||||
#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \
|
||||
p_configout << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl;
|
||||
|
||||
#include "inc/Core/BKT/ParameterDefinitionList.h"
|
||||
#undef DefineBKTParameter
|
||||
|
||||
p_configout << std::endl;
|
||||
|
||||
if (m_deletedID.size() > 0) {
|
||||
RefineIndex(p_folderPath);
|
||||
}
|
||||
else {
|
||||
if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
|
||||
if (!m_pTrees.SaveTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail;
|
||||
if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
|
||||
}
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode
|
||||
Index<T>::SetParameter(const char* p_param, const char* p_value)
|
||||
|
@ -7,7 +7,7 @@ using namespace SPTAG;
|
||||
using namespace SPTAG::COMMON;
|
||||
|
||||
|
||||
WorkSpacePool::WorkSpacePool(int p_maxCheck, int p_vectorCount)
|
||||
WorkSpacePool::WorkSpacePool(int p_maxCheck, SizeType p_vectorCount)
|
||||
: m_maxCheck(p_maxCheck),
|
||||
m_vectorCount(p_vectorCount)
|
||||
{
|
||||
|
@ -1,132 +0,0 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "inc/Core/CommonDataStructure.h"
|
||||
|
||||
using namespace SPTAG;
|
||||
|
||||
const ByteArray ByteArray::c_empty;
|
||||
|
||||
ByteArray::ByteArray()
|
||||
: m_data(nullptr),
|
||||
m_length(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
ByteArray::ByteArray(ByteArray&& p_right)
|
||||
: m_data(p_right.m_data),
|
||||
m_length(p_right.m_length),
|
||||
m_dataHolder(std::move(p_right.m_dataHolder))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
ByteArray::ByteArray(std::uint8_t* p_array, std::size_t p_length, bool p_transferOnwership)
|
||||
: m_data(p_array),
|
||||
m_length(p_length)
|
||||
{
|
||||
if (p_transferOnwership)
|
||||
{
|
||||
m_dataHolder.reset(m_data, std::default_delete<std::uint8_t[]>());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ByteArray::ByteArray(std::uint8_t* p_array, std::size_t p_length, std::shared_ptr<std::uint8_t> p_dataHolder)
|
||||
: m_data(p_array),
|
||||
m_length(p_length),
|
||||
m_dataHolder(std::move(p_dataHolder))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
ByteArray::ByteArray(const ByteArray& p_right)
|
||||
: m_data(p_right.m_data),
|
||||
m_length(p_right.m_length),
|
||||
m_dataHolder(p_right.m_dataHolder)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
ByteArray&
|
||||
ByteArray::operator= (const ByteArray& p_right)
|
||||
{
|
||||
m_data = p_right.m_data;
|
||||
m_length = p_right.m_length;
|
||||
m_dataHolder = p_right.m_dataHolder;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
ByteArray&
|
||||
ByteArray::operator= (ByteArray&& p_right)
|
||||
{
|
||||
m_data = p_right.m_data;
|
||||
m_length = p_right.m_length;
|
||||
m_dataHolder = std::move(p_right.m_dataHolder);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
ByteArray::~ByteArray()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
ByteArray
|
||||
ByteArray::Alloc(std::size_t p_length)
|
||||
{
|
||||
ByteArray byteArray;
|
||||
if (0 == p_length)
|
||||
{
|
||||
return byteArray;
|
||||
}
|
||||
|
||||
byteArray.m_dataHolder.reset(new std::uint8_t[p_length],
|
||||
std::default_delete<std::uint8_t[]>());
|
||||
|
||||
byteArray.m_length = p_length;
|
||||
byteArray.m_data = byteArray.m_dataHolder.get();
|
||||
return byteArray;
|
||||
}
|
||||
|
||||
|
||||
std::uint8_t*
|
||||
ByteArray::Data() const
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
|
||||
std::size_t
|
||||
ByteArray::Length() const
|
||||
{
|
||||
return m_length;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ByteArray::SetData(std::uint8_t* p_array, std::size_t p_length)
|
||||
{
|
||||
m_data = p_array;
|
||||
m_length = p_length;
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<std::uint8_t>
|
||||
ByteArray::DataHolder() const
|
||||
{
|
||||
return m_dataHolder;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ByteArray::Clear()
|
||||
{
|
||||
m_data = nullptr;
|
||||
m_dataHolder.reset();
|
||||
m_length = 0;
|
||||
}
|
@ -13,22 +13,7 @@ namespace SPTAG
|
||||
namespace KDT
|
||||
{
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs)
|
||||
{
|
||||
if (!m_pSamples.Load((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue;
|
||||
if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue;
|
||||
if (!m_pGraph.LoadGraphFromMemory((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue;
|
||||
m_pMetadata = std::make_shared<MemMetadataSet>();
|
||||
if (ErrorCode::Success != m_pMetadata->LoadMetadataFromMemory((char*)p_indexBlobs[3]))
|
||||
return ErrorCode::FailedParseValue;
|
||||
|
||||
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
|
||||
m_workSpacePool->Init(m_iNumberOfThreads);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader)
|
||||
ErrorCode Index<T>::LoadConfig(Helper::IniReader& p_reader)
|
||||
{
|
||||
#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \
|
||||
SetParameter(RepresentStr, \
|
||||
@ -38,35 +23,96 @@ namespace SPTAG
|
||||
|
||||
#include "inc/Core/KDT/ParameterDefinitionList.h"
|
||||
#undef DefineKDTParameter
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
|
||||
if (!m_pTrees.LoadTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail;
|
||||
if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs)
|
||||
{
|
||||
if (p_indexBlobs.size() < 3) return ErrorCode::LackOfInputs;
|
||||
|
||||
if (!m_pSamples.Load((char*)p_indexBlobs[0].Data())) return ErrorCode::FailedParseValue;
|
||||
if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1].Data())) return ErrorCode::FailedParseValue;
|
||||
if (!m_pGraph.LoadGraph((char*)p_indexBlobs[2].Data())) return ErrorCode::FailedParseValue;
|
||||
if (p_indexBlobs.size() > 3 && !m_deletedID.load((char*)p_indexBlobs[3].Data())) return ErrorCode::FailedParseValue;
|
||||
|
||||
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
|
||||
m_workSpacePool->Init(m_iNumberOfThreads);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::LoadIndexData(const std::string& p_folderPath)
|
||||
{
|
||||
if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
|
||||
if (!m_pTrees.LoadTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail;
|
||||
if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
|
||||
if (!m_deletedID.load(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail;
|
||||
|
||||
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
|
||||
m_workSpacePool->Init(m_iNumberOfThreads);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode Index<T>::SaveConfig(std::ostream& p_configOut) const
|
||||
{
|
||||
#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \
|
||||
p_configOut << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl;
|
||||
|
||||
#include "inc/Core/KDT/ParameterDefinitionList.h"
|
||||
#undef DefineKDTParameter
|
||||
p_configOut << std::endl;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode Index<T>::SaveIndexData(const std::string& p_folderPath)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_dataAddLock);
|
||||
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
|
||||
|
||||
if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
|
||||
if (!m_pTrees.SaveTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail;
|
||||
if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
|
||||
if (!m_deletedID.save(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode Index<T>::SaveIndexData(const std::vector<std::ostream*>& p_indexStreams)
|
||||
{
|
||||
if (p_indexStreams.size() < 4) return ErrorCode::LackOfInputs;
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_dataAddLock);
|
||||
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
|
||||
|
||||
if (!m_pSamples.Save(*p_indexStreams[0])) return ErrorCode::Fail;
|
||||
if (!m_pTrees.SaveTrees(*p_indexStreams[1])) return ErrorCode::Fail;
|
||||
if (!m_pGraph.SaveGraph(*p_indexStreams[2])) return ErrorCode::Fail;
|
||||
if (!m_deletedID.save(*p_indexStreams[3])) return ErrorCode::Fail;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
#pragma region K-NN search
|
||||
|
||||
#define Search(CheckDeleted1) \
|
||||
m_pTrees.InitSearchTrees(this, p_query, p_space, m_iNumberOfInitialDynamicPivots); \
|
||||
while (!p_space.m_NGQueue.empty()) { \
|
||||
COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); \
|
||||
const int *node = m_pGraph[gnode.node]; \
|
||||
const SizeType *node = m_pGraph[gnode.node]; \
|
||||
_mm_prefetch((const char *)node, _MM_HINT_T0); \
|
||||
CheckDeleted1 { \
|
||||
if (!p_query.AddPoint(gnode.node, gnode.distance) && p_space.m_iNumberOfCheckedLeaves > p_space.m_iMaxCheck) { \
|
||||
p_query.SortResult(); return; \
|
||||
} \
|
||||
} \
|
||||
for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) \
|
||||
for (DimensionType i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) \
|
||||
_mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \
|
||||
float upperBound = max(p_query.worstDist(), gnode.distance); \
|
||||
bool bLocalOpt = true; \
|
||||
for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) { \
|
||||
int nn_index = node[i]; \
|
||||
for (DimensionType i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) { \
|
||||
SizeType nn_index = node[i]; \
|
||||
if (nn_index < 0) break; \
|
||||
if (p_space.CheckAndSet(nn_index)) continue; \
|
||||
float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \
|
||||
@ -87,9 +133,9 @@ namespace SPTAG
|
||||
p_query.SortResult(); \
|
||||
|
||||
template <typename T>
|
||||
void Index<T>::SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set<int> &p_deleted) const
|
||||
void Index<T>::SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet<SizeType> &p_deleted) const
|
||||
{
|
||||
Search(if (p_deleted.find(gnode.node) == p_deleted.end()))
|
||||
Search(if (!p_deleted.contains(gnode.node)))
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -116,7 +162,7 @@ namespace SPTAG
|
||||
{
|
||||
for (int i = 0; i < p_query.GetResultNum(); ++i)
|
||||
{
|
||||
int result = p_query.GetResult(i)->VID;
|
||||
SizeType result = p_query.GetResult(i)->VID;
|
||||
p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result));
|
||||
}
|
||||
}
|
||||
@ -125,7 +171,7 @@ namespace SPTAG
|
||||
#pragma endregion
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::BuildIndex(const void* p_data, int p_vectorNum, int p_dimension)
|
||||
ErrorCode Index<T>::BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension)
|
||||
{
|
||||
omp_set_num_threads(m_iNumberOfThreads);
|
||||
|
||||
@ -135,7 +181,7 @@ namespace SPTAG
|
||||
{
|
||||
int base = COMMON::Utils::GetBase<T>();
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < GetNumSamples(); i++) {
|
||||
for (SizeType i = 0; i < GetNumSamples(); i++) {
|
||||
COMMON::Utils::Normalize(m_pSamples[i], GetFeatureDim(), base);
|
||||
}
|
||||
}
|
||||
@ -145,7 +191,54 @@ namespace SPTAG
|
||||
|
||||
m_pTrees.BuildTrees<T>(this);
|
||||
m_pGraph.BuildGraph<T>(this);
|
||||
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::RefineIndex(const std::vector<std::ostream*>& p_indexStreams)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_dataAddLock);
|
||||
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
|
||||
|
||||
SizeType newR = GetNumSamples();
|
||||
|
||||
std::vector<SizeType> indices;
|
||||
std::vector<SizeType> reverseIndices(newR);
|
||||
for (SizeType i = 0; i < newR; i++) {
|
||||
if (!m_deletedID.contains(i)) {
|
||||
indices.push_back(i);
|
||||
reverseIndices[i] = i;
|
||||
}
|
||||
else {
|
||||
while (m_deletedID.contains(newR - 1) && newR > i) newR--;
|
||||
if (newR == i) break;
|
||||
indices.push_back(newR - 1);
|
||||
reverseIndices[newR - 1] = i;
|
||||
newR--;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl;
|
||||
|
||||
if (false == m_pSamples.Refine(indices, *p_indexStreams[0])) return ErrorCode::Fail;
|
||||
if (nullptr != m_pMetadata && (p_indexStreams.size() < 6 || ErrorCode::Success != m_pMetadata->RefineMetadata(indices, *p_indexStreams[4], *p_indexStreams[5]))) return ErrorCode::Fail;
|
||||
|
||||
m_pGraph.RefineGraph<T>(this, indices, reverseIndices, *p_indexStreams[2]);
|
||||
|
||||
COMMON::KDTree newTrees(m_pTrees);
|
||||
newTrees.BuildTrees<T>(this, &indices);
|
||||
#pragma omp parallel for
|
||||
for (SizeType i = 0; i < newTrees.size(); i++) {
|
||||
if (newTrees[i].left < 0)
|
||||
newTrees[i].left = -reverseIndices[-newTrees[i].left - 1] - 1;
|
||||
if (newTrees[i].right < 0)
|
||||
newTrees[i].right = -reverseIndices[-newTrees[i].right - 1] - 1;
|
||||
}
|
||||
newTrees.SaveTrees(*p_indexStreams[1]);
|
||||
|
||||
Helper::Concurrent::ConcurrentSet<SizeType> newDeletedID;
|
||||
newDeletedID.save(*p_indexStreams[3]);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
@ -163,56 +256,40 @@ namespace SPTAG
|
||||
mkdir(folderPath.c_str());
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_dataLock);
|
||||
int newR = GetNumSamples();
|
||||
|
||||
std::vector<int> indices;
|
||||
std::vector<int> reverseIndices(newR);
|
||||
for (int i = 0; i < newR; i++) {
|
||||
if (m_deletedID.find(i) == m_deletedID.end()) {
|
||||
indices.push_back(i);
|
||||
reverseIndices[i] = i;
|
||||
}
|
||||
else {
|
||||
while (m_deletedID.find(newR - 1) != m_deletedID.end() && newR > i) newR--;
|
||||
if (newR == i) break;
|
||||
indices.push_back(newR - 1);
|
||||
reverseIndices[newR - 1] = i;
|
||||
newR--;
|
||||
}
|
||||
std::vector<std::ostream*> streams;
|
||||
streams.push_back(new std::ofstream(folderPath + m_sDataPointsFilename, std::ios::binary));
|
||||
streams.push_back(new std::ofstream(folderPath + m_sKDTFilename, std::ios::binary));
|
||||
streams.push_back(new std::ofstream(folderPath + m_sGraphFilename, std::ios::binary));
|
||||
streams.push_back(new std::ofstream(folderPath + m_sDeleteDataPointsFilename, std::ios::binary));
|
||||
if (nullptr != m_pMetadata)
|
||||
{
|
||||
streams.push_back(new std::ofstream(folderPath + m_sMetadataFile, std::ios::binary));
|
||||
streams.push_back(new std::ofstream(folderPath + m_sMetadataIndexFile, std::ios::binary));
|
||||
}
|
||||
|
||||
std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl;
|
||||
for (size_t i = 0; i < streams.size(); i++)
|
||||
if (!(((std::ofstream*)streams[i])->is_open())) return ErrorCode::FailedCreateFile;
|
||||
|
||||
if (false == m_pSamples.Refine(indices, folderPath + m_sDataPointsFilename)) return ErrorCode::FailedCreateFile;
|
||||
if (nullptr != m_pMetadata && ErrorCode::Success != m_pMetadata->RefineMetadata(indices, folderPath)) return ErrorCode::FailedCreateFile;
|
||||
|
||||
m_pGraph.RefineGraph<T>(this, indices, reverseIndices, folderPath + m_sGraphFilename);
|
||||
|
||||
COMMON::KDTree newTrees(m_pTrees);
|
||||
newTrees.BuildTrees<T>(this, &indices);
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < newTrees.size(); i++) {
|
||||
if (newTrees[i].left < 0)
|
||||
newTrees[i].left = -reverseIndices[-newTrees[i].left - 1] - 1;
|
||||
if (newTrees[i].right < 0)
|
||||
newTrees[i].right = -reverseIndices[-newTrees[i].right - 1] - 1;
|
||||
ErrorCode ret = RefineIndex(streams);
|
||||
|
||||
for (size_t i = 0; i < streams.size(); i++)
|
||||
{
|
||||
((std::ofstream*)streams[i])->close();
|
||||
delete streams[i];
|
||||
}
|
||||
newTrees.SaveTrees(folderPath + m_sKDTFilename);
|
||||
return ErrorCode::Success;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::DeleteIndex(const void* p_vectors, int p_vectorNum) {
|
||||
ErrorCode Index<T>::DeleteIndex(const void* p_vectors, SizeType p_vectorNum) {
|
||||
const T* ptr_v = (const T*)p_vectors;
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
for (int i = 0; i < p_vectorNum; i++) {
|
||||
for (SizeType i = 0; i < p_vectorNum; i++) {
|
||||
COMMON::QueryResultSet<T> query(ptr_v + i * GetFeatureDim(), m_pGraph.m_iCEF);
|
||||
SearchIndex(query);
|
||||
|
||||
for (int i = 0; i < m_pGraph.m_iCEF; i++) {
|
||||
if (query.GetResult(i)->Dist < 1e-6) {
|
||||
std::lock_guard<std::mutex> lock(m_dataLock);
|
||||
m_deletedID.insert(query.GetResult(i)->VID);
|
||||
}
|
||||
}
|
||||
@ -221,40 +298,43 @@ namespace SPTAG
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension)
|
||||
ErrorCode Index<T>::DeleteIndex(const SizeType& p_id) {
|
||||
m_deletedID.insert(p_id);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode Index<T>::AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start)
|
||||
{
|
||||
int begin, end;
|
||||
SizeType begin, end;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_dataLock);
|
||||
|
||||
if (GetNumSamples() == 0)
|
||||
return BuildIndex(p_vectors, p_vectorNum, p_dimension);
|
||||
|
||||
if (p_dimension != GetFeatureDim())
|
||||
return ErrorCode::FailedParseValue;
|
||||
std::lock_guard<std::mutex> lock(m_dataAddLock);
|
||||
|
||||
begin = GetNumSamples();
|
||||
end = GetNumSamples() + p_vectorNum;
|
||||
|
||||
m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum);
|
||||
m_pGraph.AddBatch(p_vectorNum);
|
||||
if (p_start != nullptr) *p_start = begin;
|
||||
|
||||
if (m_pSamples.R() != end || m_pGraph.R() != end) {
|
||||
if (begin == 0) return BuildIndex(p_vectors, p_vectorNum, p_dimension);
|
||||
|
||||
if (p_dimension != GetFeatureDim()) return ErrorCode::FailedParseValue;
|
||||
|
||||
if (m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum) != ErrorCode::Success || m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success) {
|
||||
std::cout << "Memory Error: Cannot alloc space for vectors" << std::endl;
|
||||
m_pSamples.SetR(begin);
|
||||
m_pGraph.SetR(begin);
|
||||
return ErrorCode::Fail;
|
||||
return ErrorCode::MemoryOverFlow;
|
||||
}
|
||||
if (DistCalcMethod::Cosine == m_iDistCalcMethod)
|
||||
{
|
||||
int base = COMMON::Utils::GetBase<T>();
|
||||
for (int i = begin; i < end; i++) {
|
||||
for (SizeType i = begin; i < end; i++) {
|
||||
COMMON::Utils::Normalize((T*)m_pSamples[i], GetFeatureDim(), base);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int node = begin; node < end; node++)
|
||||
for (SizeType node = begin; node < end; node++)
|
||||
{
|
||||
m_pGraph.RefineNode<T>(this, node, true);
|
||||
}
|
||||
@ -262,47 +342,6 @@ namespace SPTAG
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode
|
||||
Index<T>::SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t> &p_indexBlobsLen)
|
||||
{
|
||||
p_indexBlobs.resize(4);
|
||||
p_indexBlobsLen.resize(4);
|
||||
if (!m_pSamples.Save(&p_indexBlobs[0], p_indexBlobsLen[0])) return ErrorCode::Fail;
|
||||
if (!m_pTrees.SaveTrees(&p_indexBlobs[1], p_indexBlobsLen[1])) return ErrorCode::Fail;
|
||||
if (!m_pGraph.SaveGraphToMemory(&p_indexBlobs[2], p_indexBlobsLen[2])) return ErrorCode::Fail;
|
||||
if (ErrorCode::Success != m_pMetadata->SaveMetadataToMemory(&p_indexBlobs[3], p_indexBlobsLen[3]))
|
||||
return ErrorCode::Fail;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ErrorCode
|
||||
Index<T>::SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout)
|
||||
{
|
||||
m_sDataPointsFilename = "vectors.bin";
|
||||
m_sKDTFilename = "tree.bin";
|
||||
m_sGraphFilename = "graph.bin";
|
||||
|
||||
#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \
|
||||
p_configout << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl;
|
||||
|
||||
#include "inc/Core/KDT/ParameterDefinitionList.h"
|
||||
#undef DefineKDTParameter
|
||||
|
||||
p_configout << std::endl;
|
||||
|
||||
if (m_deletedID.size() > 0) {
|
||||
RefineIndex(p_folderPath);
|
||||
}
|
||||
else {
|
||||
if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
|
||||
if (!m_pTrees.SaveTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail;
|
||||
if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
|
||||
}
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ErrorCode
|
||||
Index<T>::SetParameter(const char* p_param, const char* p_value)
|
||||
|
@ -5,68 +5,43 @@
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
|
||||
using namespace SPTAG;
|
||||
|
||||
ErrorCode
|
||||
MetadataSet::RefineMetadata(std::vector<int>& indices, const std::string& p_folderPath)
|
||||
MetadataSet::RefineMetadata(std::vector<SizeType>& indices, std::ostream& p_metaOut, std::ostream& p_metaIndexOut)
|
||||
{
|
||||
std::ofstream metaOut(p_folderPath + "metadata.bin_tmp", std::ios::binary);
|
||||
std::ofstream metaIndexOut(p_folderPath + "metadataIndex.bin", std::ios::binary);
|
||||
if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile;
|
||||
|
||||
int R = (int)indices.size();
|
||||
metaIndexOut.write((char*)&R, sizeof(int));
|
||||
SizeType R = (SizeType)indices.size();
|
||||
p_metaIndexOut.write((char*)&R, sizeof(SizeType));
|
||||
std::uint64_t offset = 0;
|
||||
for (int i = 0; i < R; i++) {
|
||||
metaIndexOut.write((char*)&offset, sizeof(std::uint64_t));
|
||||
for (SizeType i = 0; i < R; i++) {
|
||||
p_metaIndexOut.write((char*)&offset, sizeof(std::uint64_t));
|
||||
ByteArray meta = GetMetadata(indices[i]);
|
||||
metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length());
|
||||
p_metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length());
|
||||
offset += meta.Length();
|
||||
}
|
||||
p_metaIndexOut.write((char*)&offset, sizeof(std::uint64_t));
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
MetadataSet::RefineMetadata(std::vector<SizeType>& indices, const std::string& p_metaFile, const std::string& p_metaindexFile)
|
||||
{
|
||||
std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary);
|
||||
std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary);
|
||||
if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile;
|
||||
|
||||
RefineMetadata(indices, metaOut, metaIndexOut);
|
||||
metaOut.close();
|
||||
metaIndexOut.write((char*)&offset, sizeof(std::uint64_t));
|
||||
metaIndexOut.close();
|
||||
|
||||
SPTAG::MetadataSet::MetaCopy(p_folderPath + "metadata.bin_tmp", p_folderPath + "metadata.bin");
|
||||
if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str());
|
||||
std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str());
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
MetadataSet::MetaCopy(const std::string& p_src, const std::string& p_dst)
|
||||
{
|
||||
if (p_src == p_dst) return ErrorCode::Success;
|
||||
|
||||
std::ifstream src(p_src, std::ios::binary);
|
||||
if (!src.is_open())
|
||||
{
|
||||
std::cerr << "ERROR: Can't open " << p_src << std::endl;
|
||||
return ErrorCode::FailedOpenFile;
|
||||
}
|
||||
|
||||
std::ofstream dst(p_dst, std::ios::binary);
|
||||
if (!dst.is_open())
|
||||
{
|
||||
std::cerr << "ERROR: Can't create " << p_dst << std::endl;
|
||||
src.close();
|
||||
return ErrorCode::FailedCreateFile;
|
||||
}
|
||||
|
||||
int bufsize = 1000000;
|
||||
char* buf = new char[bufsize];
|
||||
while (!src.eof()) {
|
||||
src.read(buf, bufsize);
|
||||
dst.write(buf, src.gcount());
|
||||
}
|
||||
delete[] buf;
|
||||
src.close();
|
||||
dst.close();
|
||||
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
MetadataSet::MetadataSet()
|
||||
{
|
||||
}
|
||||
@ -107,19 +82,19 @@ FileMetadataSet::~FileMetadataSet()
|
||||
|
||||
|
||||
ByteArray
|
||||
FileMetadataSet::GetMetadata(IndexType p_vectorID) const
|
||||
FileMetadataSet::GetMetadata(SizeType p_vectorID) const
|
||||
{
|
||||
std::uint64_t startoff = m_pOffsets[p_vectorID];
|
||||
std::uint64_t bytes = m_pOffsets[p_vectorID + 1] - startoff;
|
||||
if (p_vectorID < (IndexType)m_count) {
|
||||
if (p_vectorID < m_count) {
|
||||
m_fp->seekg(startoff, std::ios_base::beg);
|
||||
ByteArray b = ByteArray::Alloc((SizeType)bytes);
|
||||
ByteArray b = ByteArray::Alloc(bytes);
|
||||
m_fp->read((char*)b.Data(), bytes);
|
||||
return b;
|
||||
}
|
||||
else {
|
||||
startoff -= m_pOffsets[m_count];
|
||||
return ByteArray((std::uint8_t*)m_newdata.data() + startoff, static_cast<SizeType>(bytes), false);
|
||||
return ByteArray((std::uint8_t*)m_newdata.data() + startoff, bytes, false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -138,10 +113,18 @@ FileMetadataSet::Available() const
|
||||
}
|
||||
|
||||
|
||||
std::pair<std::uint64_t, std::uint64_t>
|
||||
FileMetadataSet::BufferSize() const
|
||||
{
|
||||
return std::make_pair(m_pOffsets[m_pOffsets.size() - 1],
|
||||
sizeof(SizeType) + sizeof(std::uint64_t) * m_pOffsets.size());
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FileMetadataSet::AddBatch(MetadataSet& data)
|
||||
{
|
||||
for (int i = 0; i < static_cast<int>(data.Count()); i++)
|
||||
for (SizeType i = 0; i < data.Count(); i++)
|
||||
{
|
||||
ByteArray newdata = data.GetMetadata(i);
|
||||
m_newdata.insert(m_newdata.end(), newdata.Data(), newdata.Data() + newdata.Length());
|
||||
@ -150,45 +133,52 @@ FileMetadataSet::AddBatch(MetadataSet& data)
|
||||
}
|
||||
|
||||
|
||||
|
||||
ErrorCode
|
||||
FileMetadataSet::SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut)
|
||||
{
|
||||
m_fp->seekg(0, std::ios_base::beg);
|
||||
|
||||
int bufsize = 1000000;
|
||||
char* buf = new char[bufsize];
|
||||
while (!m_fp->eof()) {
|
||||
m_fp->read(buf, bufsize);
|
||||
p_metaOut.write(buf, m_fp->gcount());
|
||||
}
|
||||
delete[] buf;
|
||||
|
||||
if (m_newdata.size() > 0) {
|
||||
p_metaOut.write((char*)m_newdata.data(), m_newdata.size());
|
||||
}
|
||||
|
||||
SizeType count = Count();
|
||||
p_metaIndexOut.write((char*)&count, sizeof(SizeType));
|
||||
p_metaIndexOut.write((char*)m_pOffsets.data(), sizeof(std::uint64_t) * m_pOffsets.size());
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
FileMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile)
|
||||
{
|
||||
ErrorCode ret = ErrorCode::Success;
|
||||
std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary);
|
||||
std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary);
|
||||
if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile;
|
||||
|
||||
SaveMetadata(metaOut, metaIndexOut);
|
||||
metaOut.close();
|
||||
metaIndexOut.close();
|
||||
|
||||
m_fp->close();
|
||||
ret = MetaCopy(m_metaFile, p_metaFile);
|
||||
if (ErrorCode::Success != ret)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
if (m_newdata.size() > 0) {
|
||||
std::ofstream tmpout(p_metaFile, std::ofstream::app|std::ios::binary);
|
||||
if (!tmpout.is_open()) return ErrorCode::FailedOpenFile;
|
||||
tmpout.write((char*)m_newdata.data(), m_newdata.size());
|
||||
tmpout.close();
|
||||
}
|
||||
if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str());
|
||||
std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str());
|
||||
m_fp->open(p_metaFile, std::ifstream::binary);
|
||||
|
||||
std::ofstream dst(p_metaindexFile, std::ios::binary);
|
||||
m_count = static_cast<int>(m_pOffsets.size()) - 1;
|
||||
m_count = Count();
|
||||
m_newdata.clear();
|
||||
dst.write((char*)&m_count, sizeof(m_count));
|
||||
dst.write((char*)m_pOffsets.data(), sizeof(std::uint64_t) * m_pOffsets.size());
|
||||
return ret;
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
FileMetadataSet::SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) {
|
||||
// TODO(lxj): serialize file to mem?
|
||||
return ErrorCode::Fail;
|
||||
}
|
||||
|
||||
ErrorCode
|
||||
FileMetadataSet::LoadMetadataFromMemory(void *pGraphMemFile) {
|
||||
// TODO(lxj): not support yet
|
||||
return ErrorCode::Fail;
|
||||
}
|
||||
|
||||
MemMetadataSet::MemMetadataSet(ByteArray p_metadata, ByteArray p_offsets, SizeType p_count)
|
||||
: m_metadataHolder(std::move(p_metadata)),
|
||||
m_offsetHolder(std::move(p_offsets)),
|
||||
@ -205,17 +195,17 @@ MemMetadataSet::~MemMetadataSet()
|
||||
|
||||
|
||||
ByteArray
|
||||
MemMetadataSet::GetMetadata(IndexType p_vectorID) const
|
||||
MemMetadataSet::GetMetadata(SizeType p_vectorID) const
|
||||
{
|
||||
if (static_cast<SizeType>(p_vectorID) < m_count)
|
||||
if (p_vectorID < m_count)
|
||||
{
|
||||
return ByteArray(m_metadataHolder.Data() + m_offsets[p_vectorID],
|
||||
static_cast<SizeType>(m_offsets[p_vectorID + 1] - m_offsets[p_vectorID]),
|
||||
m_metadataHolder.DataHolder());
|
||||
m_offsets[p_vectorID + 1] - m_offsets[p_vectorID],
|
||||
false);
|
||||
}
|
||||
else if (p_vectorID < m_offsets.size() - 1) {
|
||||
else if (p_vectorID < (SizeType)(m_offsets.size() - 1)) {
|
||||
return ByteArray((std::uint8_t*)m_newdata.data() + m_offsets[p_vectorID] - m_offsets[m_count],
|
||||
static_cast<SizeType>(m_offsets[p_vectorID + 1] - m_offsets[p_vectorID]),
|
||||
m_offsets[p_vectorID + 1] - m_offsets[p_vectorID],
|
||||
false);
|
||||
}
|
||||
|
||||
@ -226,7 +216,7 @@ MemMetadataSet::GetMetadata(IndexType p_vectorID) const
|
||||
SizeType
|
||||
MemMetadataSet::Count() const
|
||||
{
|
||||
return m_count;
|
||||
return static_cast<SizeType>(m_offsets.size() - 1);
|
||||
}
|
||||
|
||||
|
||||
@ -236,10 +226,18 @@ MemMetadataSet::Available() const
|
||||
return m_metadataHolder.Length() > 0 && m_offsetHolder.Length() > 0;
|
||||
}
|
||||
|
||||
|
||||
std::pair<std::uint64_t, std::uint64_t>
|
||||
MemMetadataSet::BufferSize() const
|
||||
{
|
||||
return std::make_pair(m_offsets[m_offsets.size() - 1],
|
||||
sizeof(SizeType) + sizeof(std::uint64_t) * m_offsets.size());
|
||||
}
|
||||
|
||||
void
|
||||
MemMetadataSet::AddBatch(MetadataSet& data)
|
||||
{
|
||||
for (int i = 0; i < static_cast<int>(data.Count()); i++)
|
||||
for (SizeType i = 0; i < data.Count(); i++)
|
||||
{
|
||||
ByteArray newdata = data.GetMetadata(i);
|
||||
m_newdata.insert(m_newdata.end(), newdata.Data(), newdata.Data() + newdata.Length());
|
||||
@ -247,83 +245,36 @@ MemMetadataSet::AddBatch(MetadataSet& data)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
MemMetadataSet::SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut)
|
||||
{
|
||||
p_metaOut.write(reinterpret_cast<const char*>(m_metadataHolder.Data()), m_metadataHolder.Length());
|
||||
if (m_newdata.size() > 0) {
|
||||
p_metaOut.write((char*)m_newdata.data(), m_newdata.size());
|
||||
}
|
||||
|
||||
SizeType count = Count();
|
||||
p_metaIndexOut.write((char*)&count, sizeof(SizeType));
|
||||
p_metaIndexOut.write((char*)m_offsets.data(), sizeof(std::uint64_t) * m_offsets.size());
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
|
||||
ErrorCode
|
||||
MemMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile)
|
||||
{
|
||||
std::ofstream outputStream;
|
||||
outputStream.open(p_metaFile, std::ios::binary);
|
||||
if (!outputStream.is_open())
|
||||
{
|
||||
std::cerr << "Error: Failed to create file " << p_metaFile << "." << std::endl;
|
||||
return ErrorCode::FailedCreateFile;
|
||||
}
|
||||
std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary);
|
||||
std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary);
|
||||
if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile;
|
||||
|
||||
outputStream.write(reinterpret_cast<const char*>(m_metadataHolder.Data()), m_metadataHolder.Length());
|
||||
outputStream.write((const char*)m_newdata.data(), sizeof(std::uint8_t)*m_newdata.size());
|
||||
outputStream.close();
|
||||
|
||||
outputStream.open(p_metaindexFile, std::ios::binary);
|
||||
if (!outputStream.is_open())
|
||||
{
|
||||
std::cerr << "Error: Failed to create file " << p_metaindexFile << "." << std::endl;
|
||||
return ErrorCode::FailedCreateFile;
|
||||
}
|
||||
|
||||
m_count = static_cast<int>(m_offsets.size()) - 1;
|
||||
outputStream.write(reinterpret_cast<const char*>(&m_count), sizeof(m_count));
|
||||
outputStream.write(reinterpret_cast<const char*>(m_offsets.data()), sizeof(std::uint64_t)*m_offsets.size());
|
||||
outputStream.close();
|
||||
SaveMetadata(metaOut, metaIndexOut);
|
||||
metaOut.close();
|
||||
metaIndexOut.close();
|
||||
|
||||
if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str());
|
||||
std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str());
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
ErrorCode
|
||||
MemMetadataSet::SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) {
|
||||
auto size = sizeof(int64_t) + sizeof(int64_t) + m_metadataHolder.Length() + sizeof(std::uint64_t) * m_offsets.size();
|
||||
char* mem = (char*)malloc(size);
|
||||
if (mem == NULL) return ErrorCode::Fail;
|
||||
|
||||
auto ptr = mem;
|
||||
*(int64_t*)ptr = m_metadataHolder.Length();
|
||||
ptr += sizeof(int64_t);
|
||||
|
||||
m_count = static_cast<int>(m_offsets.size()) - 1;
|
||||
*(int64_t*)ptr = m_count;
|
||||
ptr += sizeof(int64_t);
|
||||
|
||||
memcpy(ptr, m_metadataHolder.Data(), m_metadataHolder.Length());
|
||||
ptr += m_metadataHolder.Length();
|
||||
|
||||
memcpy(ptr, m_offsets.data(), sizeof(std::uint64_t)*m_offsets.size());
|
||||
|
||||
*pGraphMemFile = mem;
|
||||
len = size;
|
||||
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
ErrorCode
|
||||
MemMetadataSet::LoadMetadataFromMemory(void *pGraphMemFile) {
|
||||
m_metadataHolder.Clear();
|
||||
m_offsetHolder.Clear();
|
||||
m_offsets.clear();
|
||||
|
||||
char* ptr = (char *)pGraphMemFile;
|
||||
auto metadataHolderLength = *(int64_t *)ptr;
|
||||
ptr += sizeof(int64_t);
|
||||
|
||||
m_count = *(int64_t *)ptr;
|
||||
ptr += sizeof(int64_t);
|
||||
|
||||
m_metadataHolder = ByteArray::Alloc(metadataHolderLength);
|
||||
memcpy(m_metadataHolder.Data(), ptr, metadataHolderLength);
|
||||
ptr += metadataHolderLength;
|
||||
|
||||
m_offsetHolder = ByteArray::Alloc(sizeof(std::uint64_t ) * (m_count + 1));
|
||||
memcpy(m_offsetHolder.Data(), ptr, sizeof(std::uint64_t ) * (m_count + 1));
|
||||
|
||||
const std::uint64_t* newdata = reinterpret_cast<const std::uint64_t*>(m_offsetHolder.Data());
|
||||
m_offsets.insert(m_offsets.end(), newdata, newdata + m_count + 1);
|
||||
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "inc/Helper/CommonHelper.h"
|
||||
#include "inc/Helper/StringConvert.h"
|
||||
#include "inc/Helper/SimpleIniReader.h"
|
||||
#include "inc/Helper/BufferStream.h"
|
||||
|
||||
#include "inc/Core/BKT/Index.h"
|
||||
#include "inc/Core/KDT/Index.h"
|
||||
@ -46,7 +47,7 @@ VectorIndex::SetMetadata(const std::string& p_metadataFilePath, const std::strin
|
||||
|
||||
|
||||
ByteArray
|
||||
VectorIndex::GetMetadata(IndexType p_vectorID) const {
|
||||
VectorIndex::GetMetadata(SizeType p_vectorID) const {
|
||||
if (nullptr != m_pMetadata)
|
||||
{
|
||||
return m_pMetadata->GetMetadata(p_vectorID);
|
||||
@ -55,6 +56,100 @@ VectorIndex::GetMetadata(IndexType p_vectorID) const {
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<std::vector<std::uint64_t>> VectorIndex::CalculateBufferSize() const
|
||||
{
|
||||
std::shared_ptr<std::vector<std::uint64_t>> ret = BufferSize();
|
||||
if (m_pMetadata != nullptr)
|
||||
{
|
||||
auto metasize = m_pMetadata->BufferSize();
|
||||
ret->push_back(metasize.first);
|
||||
ret->push_back(metasize.second);
|
||||
}
|
||||
return std::move(ret);
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::LoadIndexConfig(Helper::IniReader& p_reader)
|
||||
{
|
||||
std::string metadataSection("MetaData");
|
||||
if (p_reader.DoesSectionExist(metadataSection))
|
||||
{
|
||||
m_sMetadataFile = p_reader.GetParameter(metadataSection, "MetaDataFilePath", std::string());
|
||||
m_sMetadataIndexFile = p_reader.GetParameter(metadataSection, "MetaDataIndexPath", std::string());
|
||||
}
|
||||
|
||||
if (DistCalcMethod::Undefined == p_reader.GetParameter("Index", "DistCalcMethod", DistCalcMethod::Undefined))
|
||||
{
|
||||
std::cerr << "Error: Failed to load parameter DistCalcMethod." << std::endl;
|
||||
return ErrorCode::Fail;
|
||||
}
|
||||
return LoadConfig(p_reader);
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::SaveIndexConfig(std::ostream& p_configOut)
|
||||
{
|
||||
if (nullptr != m_pMetadata)
|
||||
{
|
||||
p_configOut << "[MetaData]" << std::endl;
|
||||
p_configOut << "MetaDataFilePath=" << m_sMetadataFile << std::endl;
|
||||
p_configOut << "MetaDataIndexPath=" << m_sMetadataIndexFile << std::endl;
|
||||
if (nullptr != m_pMetaToVec) p_configOut << "MetaDataToVectorIndex=true" << std::endl;
|
||||
p_configOut << std::endl;
|
||||
}
|
||||
|
||||
p_configOut << "[Index]" << std::endl;
|
||||
p_configOut << "IndexAlgoType=" << Helper::Convert::ConvertToString(GetIndexAlgoType()) << std::endl;
|
||||
p_configOut << "ValueType=" << Helper::Convert::ConvertToString(GetVectorValueType()) << std::endl;
|
||||
p_configOut << std::endl;
|
||||
|
||||
return SaveConfig(p_configOut);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
VectorIndex::BuildMetaMapping()
|
||||
{
|
||||
m_pMetaToVec.reset(new std::unordered_map<std::string, SizeType>);
|
||||
for (SizeType i = 0; i < m_pMetadata->Count(); i++) {
|
||||
ByteArray meta = m_pMetadata->GetMetadata(i);
|
||||
m_pMetaToVec->emplace(std::string((char*)meta.Data(), meta.Length()), i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::LoadIndex(const std::string& p_config, const std::vector<ByteArray>& p_indexBlobs)
|
||||
{
|
||||
SPTAG::Helper::IniReader p_reader;
|
||||
std::istringstream p_configin(p_config);
|
||||
if (SPTAG::ErrorCode::Success != p_reader.LoadIni(p_configin)) return ErrorCode::FailedParseValue;
|
||||
LoadIndexConfig(p_reader);
|
||||
|
||||
if (p_reader.DoesSectionExist("MetaData") && p_indexBlobs.size() > 4)
|
||||
{
|
||||
ByteArray pMetaIndex = p_indexBlobs[p_indexBlobs.size() - 1];
|
||||
m_pMetadata.reset(new MemMetadataSet(p_indexBlobs[p_indexBlobs.size() - 2],
|
||||
ByteArray(pMetaIndex.Data() + sizeof(SizeType), pMetaIndex.Length() - sizeof(SizeType), false),
|
||||
*((SizeType*)pMetaIndex.Data())));
|
||||
|
||||
if (!m_pMetadata->Available())
|
||||
{
|
||||
std::cerr << "Error: Failed to load metadata." << std::endl;
|
||||
return ErrorCode::Fail;
|
||||
}
|
||||
|
||||
if (p_reader.GetParameter("MetaData", "MetaDataToVectorIndex", std::string()) == "true")
|
||||
{
|
||||
BuildMetaMapping();
|
||||
}
|
||||
}
|
||||
return LoadIndexDataFromMemory(p_indexBlobs);
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::LoadIndex(const std::string& p_folderPath)
|
||||
{
|
||||
@ -65,40 +160,64 @@ VectorIndex::LoadIndex(const std::string& p_folderPath)
|
||||
}
|
||||
|
||||
Helper::IniReader p_configReader;
|
||||
if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini"))
|
||||
if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini")) return ErrorCode::FailedOpenFile;
|
||||
LoadIndexConfig(p_configReader);
|
||||
|
||||
if (p_configReader.DoesSectionExist("MetaData"))
|
||||
{
|
||||
return ErrorCode::FailedOpenFile;
|
||||
}
|
||||
|
||||
std::string metadataSection("MetaData");
|
||||
if (p_configReader.DoesSectionExist(metadataSection))
|
||||
{
|
||||
std::string metadataFilePath = p_configReader.GetParameter(metadataSection,
|
||||
"MetaDataFilePath",
|
||||
std::string());
|
||||
std::string metadataIndexFilePath = p_configReader.GetParameter(metadataSection,
|
||||
"MetaDataIndexPath",
|
||||
std::string());
|
||||
|
||||
m_pMetadata.reset(new FileMetadataSet(folderPath + metadataFilePath, folderPath + metadataIndexFilePath));
|
||||
m_pMetadata.reset(new FileMetadataSet(folderPath + m_sMetadataFile, folderPath + m_sMetadataIndexFile));
|
||||
|
||||
if (!m_pMetadata->Available())
|
||||
{
|
||||
std::cerr << "Error: Failed to load metadata." << std::endl;
|
||||
return ErrorCode::Fail;
|
||||
}
|
||||
}
|
||||
if (DistCalcMethod::Undefined == p_configReader.GetParameter("Index", "DistCalcMethod", DistCalcMethod::Undefined))
|
||||
{
|
||||
std::cerr << "Error: Failed to load parameter DistCalcMethod." << std::endl;
|
||||
return ErrorCode::Fail;
|
||||
}
|
||||
|
||||
return LoadIndex(folderPath, p_configReader);
|
||||
if (p_configReader.GetParameter("MetaData", "MetaDataToVectorIndex", std::string()) == "true")
|
||||
{
|
||||
BuildMetaMapping();
|
||||
}
|
||||
}
|
||||
return LoadIndexData(folderPath);
|
||||
}
|
||||
|
||||
|
||||
ErrorCode VectorIndex::SaveIndex(const std::string& p_folderPath)
|
||||
ErrorCode
|
||||
VectorIndex::SaveIndex(std::string& p_config, const std::vector<ByteArray>& p_indexBlobs)
|
||||
{
|
||||
std::ostringstream p_configStream;
|
||||
SaveIndexConfig(p_configStream);
|
||||
p_config = p_configStream.str();
|
||||
|
||||
std::vector<std::ostream*> p_indexStreams;
|
||||
for (size_t i = 0; i < p_indexBlobs.size(); i++)
|
||||
{
|
||||
p_indexStreams.push_back(new Helper::obufferstream(new Helper::streambuf((char*)p_indexBlobs[i].Data(), p_indexBlobs[i].Length()), true));
|
||||
}
|
||||
|
||||
ErrorCode ret = ErrorCode::Success;
|
||||
if (NeedRefine())
|
||||
{
|
||||
ret = RefineIndex(p_indexStreams);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_pMetadata != nullptr && p_indexStreams.size() > 5)
|
||||
{
|
||||
ret = m_pMetadata->SaveMetadata(*p_indexStreams[p_indexStreams.size() - 2], *p_indexStreams[p_indexStreams.size() - 1]);
|
||||
}
|
||||
if (ErrorCode::Success == ret) ret = SaveIndexData(p_indexStreams);
|
||||
}
|
||||
for (size_t i = 0; i < p_indexStreams.size(); i++)
|
||||
{
|
||||
delete p_indexStreams[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::SaveIndex(const std::string& p_folderPath)
|
||||
{
|
||||
std::string folderPath(p_folderPath);
|
||||
if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep)
|
||||
@ -111,39 +230,24 @@ ErrorCode VectorIndex::SaveIndex(const std::string& p_folderPath)
|
||||
mkdir(folderPath.c_str());
|
||||
}
|
||||
|
||||
std::string loaderFilePath = folderPath + "indexloader.ini";
|
||||
std::ofstream configFile(folderPath + "indexloader.ini");
|
||||
if (!configFile.is_open()) return ErrorCode::FailedCreateFile;
|
||||
SaveIndexConfig(configFile);
|
||||
configFile.close();
|
||||
|
||||
if (NeedRefine()) return RefineIndex(p_folderPath);
|
||||
|
||||
std::ofstream loaderFile(loaderFilePath);
|
||||
if (!loaderFile.is_open())
|
||||
if (m_pMetadata != nullptr)
|
||||
{
|
||||
return ErrorCode::FailedCreateFile;
|
||||
ErrorCode ret = m_pMetadata->SaveMetadata(folderPath + m_sMetadataFile, folderPath + m_sMetadataIndexFile);
|
||||
if (ErrorCode::Success != ret) return ret;
|
||||
}
|
||||
|
||||
if (nullptr != m_pMetadata)
|
||||
{
|
||||
std::string metadataFile = "metadata.bin";
|
||||
std::string metadataIndexFile = "metadataIndex.bin";
|
||||
loaderFile << "[MetaData]" << std::endl;
|
||||
loaderFile << "MetaDataFilePath=" << metadataFile << std::endl;
|
||||
loaderFile << "MetaDataIndexPath=" << metadataIndexFile << std::endl;
|
||||
loaderFile << std::endl;
|
||||
|
||||
m_pMetadata->SaveMetadata(folderPath + metadataFile, folderPath + metadataIndexFile);
|
||||
}
|
||||
|
||||
loaderFile << "[Index]" << std::endl;
|
||||
loaderFile << "IndexAlgoType=" << Helper::Convert::ConvertToString(GetIndexAlgoType()) << std::endl;
|
||||
loaderFile << "ValueType=" << Helper::Convert::ConvertToString(GetVectorValueType()) << std::endl;
|
||||
loaderFile << std::endl;
|
||||
|
||||
ErrorCode ret = SaveIndex(folderPath, loaderFile);
|
||||
loaderFile.close();
|
||||
return ret;
|
||||
return SaveIndexData(folderPath);
|
||||
}
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::BuildIndex(std::shared_ptr<VectorSet> p_vectorSet,
|
||||
std::shared_ptr<MetadataSet> p_metadataSet)
|
||||
std::shared_ptr<MetadataSet> p_metadataSet, bool p_withMetaIndex)
|
||||
{
|
||||
if (nullptr == p_vectorSet || p_vectorSet->Count() == 0 || p_vectorSet->Dimension() == 0 || p_vectorSet->GetValueType() != GetVectorValueType())
|
||||
{
|
||||
@ -152,13 +256,17 @@ VectorIndex::BuildIndex(std::shared_ptr<VectorSet> p_vectorSet,
|
||||
|
||||
BuildIndex(p_vectorSet->GetData(), p_vectorSet->Count(), p_vectorSet->Dimension());
|
||||
m_pMetadata = std::move(p_metadataSet);
|
||||
if (p_withMetaIndex && m_pMetadata != nullptr)
|
||||
{
|
||||
BuildMetaMapping();
|
||||
}
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::SearchIndex(const void* p_vector, int p_neighborCount, std::vector<BasicResult>& p_results) const {
|
||||
QueryResult res(p_vector, p_neighborCount, p_results);
|
||||
VectorIndex::SearchIndex(const void* p_vector, int p_neighborCount, bool p_withMeta, BasicResult* p_results) const {
|
||||
QueryResult res(p_vector, p_neighborCount, p_withMeta, p_results);
|
||||
SearchIndex(res);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
@ -170,17 +278,54 @@ VectorIndex::AddIndex(std::shared_ptr<VectorSet> p_vectorSet, std::shared_ptr<Me
|
||||
{
|
||||
return ErrorCode::Fail;
|
||||
}
|
||||
AddIndex(p_vectorSet->GetData(), p_vectorSet->Count(), p_vectorSet->Dimension());
|
||||
|
||||
SizeType currStart;
|
||||
ErrorCode ret = AddIndex(p_vectorSet->GetData(), p_vectorSet->Count(), p_vectorSet->Dimension(), &currStart);
|
||||
if (ret != ErrorCode::Success) return ret;
|
||||
|
||||
if (m_pMetadata == nullptr) {
|
||||
m_pMetadata = std::move(p_metadataSet);
|
||||
if (currStart == 0)
|
||||
m_pMetadata = std::move(p_metadataSet);
|
||||
else
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
else {
|
||||
m_pMetadata->AddBatch(*p_metadataSet);
|
||||
}
|
||||
|
||||
if (m_pMetaToVec != nullptr) {
|
||||
for (SizeType i = 0; i < p_vectorSet->Count(); i++) {
|
||||
ByteArray meta = m_pMetadata->GetMetadata(currStart + i);
|
||||
DeleteIndex(meta);
|
||||
m_pMetaToVec->emplace(std::string((char*)meta.Data(), meta.Length()), currStart + i);
|
||||
}
|
||||
}
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::DeleteIndex(ByteArray p_meta) {
|
||||
if (m_pMetaToVec == nullptr) return ErrorCode::Fail;
|
||||
|
||||
std::string meta((char*)p_meta.Data(), p_meta.Length());
|
||||
auto iter = m_pMetaToVec->find(meta);
|
||||
if (iter != m_pMetaToVec->end()) DeleteIndex(iter->second);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
const void* VectorIndex::GetSample(ByteArray p_meta)
|
||||
{
|
||||
if (m_pMetaToVec == nullptr) return nullptr;
|
||||
|
||||
std::string meta((char*)p_meta.Data(), p_meta.Length());
|
||||
auto iter = m_pMetaToVec->find(meta);
|
||||
if (iter != m_pMetaToVec->end()) return GetSample(iter->second);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<VectorIndex>
|
||||
VectorIndex::CreateInstance(IndexAlgoType p_algo, VectorValueType p_valuetype)
|
||||
{
|
||||
@ -223,100 +368,61 @@ ErrorCode
|
||||
VectorIndex::LoadIndex(const std::string& p_loaderFilePath, std::shared_ptr<VectorIndex>& p_vectorIndex)
|
||||
{
|
||||
Helper::IniReader iniReader;
|
||||
|
||||
if (ErrorCode::Success != iniReader.LoadIniFile(p_loaderFilePath + "/indexloader.ini"))
|
||||
{
|
||||
return ErrorCode::FailedOpenFile;
|
||||
}
|
||||
if (ErrorCode::Success != iniReader.LoadIniFile(p_loaderFilePath + "/indexloader.ini")) return ErrorCode::FailedOpenFile;
|
||||
|
||||
IndexAlgoType algoType = iniReader.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined);
|
||||
VectorValueType valueType = iniReader.GetParameter("Index", "ValueType", VectorValueType::Undefined);
|
||||
if (IndexAlgoType::Undefined == algoType || VectorValueType::Undefined == valueType)
|
||||
{
|
||||
return ErrorCode::Fail;
|
||||
}
|
||||
|
||||
if (algoType == IndexAlgoType::BKT) {
|
||||
switch (valueType)
|
||||
{
|
||||
#define DefineVectorValueType(Name, Type) \
|
||||
case VectorValueType::Name: \
|
||||
p_vectorIndex.reset(new BKT::Index<Type>); \
|
||||
p_vectorIndex->LoadIndex(p_loaderFilePath); \
|
||||
break; \
|
||||
p_vectorIndex = CreateInstance(algoType, valueType);
|
||||
if (p_vectorIndex == nullptr) return ErrorCode::FailedParseValue;
|
||||
|
||||
#include "inc/Core/DefinitionList.h"
|
||||
#undef DefineVectorValueType
|
||||
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
else if (algoType == IndexAlgoType::KDT) {
|
||||
switch (valueType)
|
||||
{
|
||||
#define DefineVectorValueType(Name, Type) \
|
||||
case VectorValueType::Name: \
|
||||
p_vectorIndex.reset(new KDT::Index<Type>); \
|
||||
p_vectorIndex->LoadIndex(p_loaderFilePath); \
|
||||
break; \
|
||||
|
||||
#include "inc/Core/DefinitionList.h"
|
||||
#undef DefineVectorValueType
|
||||
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
return ErrorCode::Success;
|
||||
return p_vectorIndex->LoadIndex(p_loaderFilePath);
|
||||
}
|
||||
|
||||
|
||||
ErrorCode VectorIndex::MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2)
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::LoadIndex(const std::string& p_config, const std::vector<ByteArray>& p_indexBlobs, std::shared_ptr<VectorIndex>& p_vectorIndex)
|
||||
{
|
||||
SPTAG::Helper::IniReader iniReader;
|
||||
std::istringstream p_configin(p_config);
|
||||
if (SPTAG::ErrorCode::Success != iniReader.LoadIni(p_configin)) return ErrorCode::FailedParseValue;
|
||||
|
||||
IndexAlgoType algoType = iniReader.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined);
|
||||
VectorValueType valueType = iniReader.GetParameter("Index", "ValueType", VectorValueType::Undefined);
|
||||
|
||||
p_vectorIndex = CreateInstance(algoType, valueType);
|
||||
if (p_vectorIndex == nullptr) return ErrorCode::FailedParseValue;
|
||||
|
||||
return p_vectorIndex->LoadIndex(p_config, p_indexBlobs);
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
VectorIndex::MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2)
|
||||
{
|
||||
std::string folderPath1(p_indexFilePath1), folderPath2(p_indexFilePath2);
|
||||
if (!folderPath1.empty() && *(folderPath1.rbegin()) != FolderSep) folderPath1 += FolderSep;
|
||||
if (!folderPath2.empty() && *(folderPath2.rbegin()) != FolderSep) folderPath2 += FolderSep;
|
||||
|
||||
Helper::IniReader p_configReader1, p_configReader2;
|
||||
if (ErrorCode::Success != p_configReader1.LoadIniFile(folderPath1 + "/indexloader.ini"))
|
||||
return ErrorCode::FailedOpenFile;
|
||||
std::shared_ptr<VectorIndex> index1, index2;
|
||||
LoadIndex(folderPath1, index1);
|
||||
LoadIndex(folderPath2, index2);
|
||||
|
||||
if (ErrorCode::Success != p_configReader2.LoadIniFile(folderPath2 + "/indexloader.ini"))
|
||||
return ErrorCode::FailedOpenFile;
|
||||
std::shared_ptr<VectorSet> p_vectorSet;
|
||||
std::shared_ptr<MetadataSet> p_metaSet;
|
||||
size_t vectorSize = GetValueTypeSize(index2->GetVectorValueType()) * index2->GetFeatureDim();
|
||||
std::uint64_t offsets[2] = { 0 };
|
||||
ByteArray metaoffset((std::uint8_t*)offsets, 2 * sizeof(std::uint64_t), false);
|
||||
for (SizeType i = 0; i < index2->GetNumSamples(); i++)
|
||||
if (index2->ContainSample(i))
|
||||
{
|
||||
p_vectorSet.reset(new BasicVectorSet(ByteArray((std::uint8_t*)index2->GetSample(i), vectorSize, false),
|
||||
index2->GetVectorValueType(), index2->GetFeatureDim(), 1));
|
||||
ByteArray meta = index2->GetMetadata(i);
|
||||
offsets[1] = meta.Length();
|
||||
p_metaSet.reset(new MemMetadataSet(meta, metaoffset, 1));
|
||||
index1->AddIndex(p_vectorSet, p_metaSet);
|
||||
}
|
||||
|
||||
std::shared_ptr<VectorIndex> index = CreateInstance(
|
||||
p_configReader1.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined),
|
||||
p_configReader1.GetParameter("Index", "ValueType", VectorValueType::Undefined));
|
||||
if (index == nullptr) return ErrorCode::FailedParseValue;
|
||||
|
||||
std::string empty("");
|
||||
if (!COMMON::DataUtils::MergeIndex(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty),
|
||||
folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty),
|
||||
folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty),
|
||||
folderPath2 + p_configReader1.GetParameter("Index", "VectorFilePath", empty),
|
||||
folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty),
|
||||
folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty)))
|
||||
return ErrorCode::Fail;
|
||||
|
||||
for (const auto& iter : p_configReader1.GetParameters("Index"))
|
||||
index->SetParameter(iter.first.c_str(), iter.second.c_str());
|
||||
|
||||
if (p_configReader1.DoesSectionExist("MetaData"))
|
||||
{
|
||||
for (const auto& iter : p_configReader1.GetParameters("MetaData"))
|
||||
index->SetParameter(iter.first.c_str(), iter.second.c_str());
|
||||
index->SetMetadata(folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty),
|
||||
folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty));
|
||||
}
|
||||
|
||||
std::ifstream vecIn(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), std::ios::binary);
|
||||
int R, C;
|
||||
vecIn.read((char*)&R, sizeof(int));
|
||||
vecIn.read((char*)&C, sizeof(int));
|
||||
size_t size = R * C * GetValueTypeSize(index->GetVectorValueType());
|
||||
char* data = new char[size];
|
||||
vecIn.read(data, size);
|
||||
vecIn.close();
|
||||
index->BuildIndex((void*)data, R, C);
|
||||
index->SaveIndex(folderPath1);
|
||||
index1->SaveIndex(folderPath1);
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ VectorSet::~VectorSet()
|
||||
|
||||
BasicVectorSet::BasicVectorSet(const ByteArray& p_bytesArray,
|
||||
VectorValueType p_valueType,
|
||||
SizeType p_dimension,
|
||||
DimensionType p_dimension,
|
||||
SizeType p_vectorCount)
|
||||
: m_data(p_bytesArray),
|
||||
m_valueType(p_valueType),
|
||||
@ -43,15 +43,14 @@ BasicVectorSet::GetValueType() const
|
||||
|
||||
|
||||
void*
|
||||
BasicVectorSet::GetVector(IndexType p_vectorID) const
|
||||
BasicVectorSet::GetVector(SizeType p_vectorID) const
|
||||
{
|
||||
if (p_vectorID < 0 || static_cast<SizeType>(p_vectorID) >= m_vectorCount)
|
||||
if (p_vectorID < 0 || p_vectorID >= m_vectorCount)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SizeType offset = static_cast<SizeType>(p_vectorID) * m_perVectorDataSize;
|
||||
return reinterpret_cast<void*>(m_data.Data() + offset);
|
||||
return reinterpret_cast<void*>(m_data.Data() + ((size_t)p_vectorID) * m_perVectorDataSize);
|
||||
}
|
||||
|
||||
|
||||
@ -61,7 +60,7 @@ BasicVectorSet::GetData() const
|
||||
return reinterpret_cast<void*>(m_data.Data());
|
||||
}
|
||||
|
||||
SizeType
|
||||
DimensionType
|
||||
BasicVectorSet::Dimension() const
|
||||
{
|
||||
return m_dimension;
|
||||
@ -88,8 +87,8 @@ BasicVectorSet::Save(const std::string& p_vectorFile) const
|
||||
FILE * fp = fopen(p_vectorFile.c_str(), "wb");
|
||||
if (fp == NULL) return ErrorCode::FailedOpenFile;
|
||||
|
||||
fwrite(&m_vectorCount, sizeof(int), 1, fp);
|
||||
fwrite(&m_dimension, sizeof(int), 1, fp);
|
||||
fwrite(&m_vectorCount, sizeof(SizeType), 1, fp);
|
||||
fwrite(&m_dimension, sizeof(DimensionType), 1, fp);
|
||||
|
||||
fwrite((const void*)(m_data.Data()), m_data.Length(), 1, fp);
|
||||
fclose(fp);
|
||||
|
@ -25,15 +25,8 @@ IniReader::~IniReader()
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
IniReader::LoadIniFile(const std::string& p_iniFilePath)
|
||||
ErrorCode IniReader::LoadIni(std::istream& p_input)
|
||||
{
|
||||
std::ifstream input(p_iniFilePath);
|
||||
if (!input.is_open())
|
||||
{
|
||||
return ErrorCode::FailedOpenFile;
|
||||
}
|
||||
|
||||
const std::size_t c_bufferSize = 1 << 16;
|
||||
|
||||
std::unique_ptr<char[]> line(new char[c_bufferSize]);
|
||||
@ -51,9 +44,9 @@ IniReader::LoadIniFile(const std::string& p_iniFilePath)
|
||||
return std::isspace(p_ch) != 0;
|
||||
};
|
||||
|
||||
while (!input.eof())
|
||||
while (!p_input.eof())
|
||||
{
|
||||
if (!input.getline(line.get(), c_bufferSize))
|
||||
if (!p_input.getline(line.get(), c_bufferSize))
|
||||
{
|
||||
break;
|
||||
}
|
||||
@ -141,11 +134,21 @@ IniReader::LoadIniFile(const std::string& p_iniFilePath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ErrorCode::Success;
|
||||
}
|
||||
|
||||
|
||||
ErrorCode
|
||||
IniReader::LoadIniFile(const std::string& p_iniFilePath)
|
||||
{
|
||||
std::ifstream input(p_iniFilePath);
|
||||
if (!input.is_open()) return ErrorCode::FailedOpenFile;
|
||||
ErrorCode ret = LoadIni(input);
|
||||
input.close();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
IniReader::DoesSectionExist(const std::string& p_section) const
|
||||
{
|
||||
|
44
core/src/index/thirdparty/SPTAG/AnnService/src/Helper/VectorSetReader.cpp
vendored
Normal file
44
core/src/index/thirdparty/SPTAG/AnnService/src/Helper/VectorSetReader.cpp
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "inc/Helper/VectorSetReader.h"
|
||||
#include "inc/Helper/VectorSetReaders/DefaultReader.h"
|
||||
|
||||
|
||||
using namespace SPTAG;
|
||||
using namespace SPTAG::Helper;
|
||||
|
||||
|
||||
ReaderOptions::ReaderOptions(VectorValueType p_valueType, DimensionType p_dimension, std::string p_vectorDelimiter, std::uint32_t p_threadNum)
|
||||
: m_threadNum(p_threadNum), m_dimension(p_dimension), m_vectorDelimiter(p_vectorDelimiter), m_inputValueType(p_valueType)
|
||||
{
|
||||
AddOptionalOption(m_threadNum, "-t", "--thread", "Thread Number.");
|
||||
AddOptionalOption(m_vectorDelimiter, "", "--delimiter", "Vector delimiter.");
|
||||
AddRequiredOption(m_dimension, "-d", "--dimension", "Dimension of vector.");
|
||||
AddRequiredOption(m_inputValueType, "-v", "--vectortype", "Input vector data type. Default is float.");
|
||||
}
|
||||
|
||||
|
||||
ReaderOptions::~ReaderOptions()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
VectorSetReader::VectorSetReader(std::shared_ptr<ReaderOptions> p_options)
|
||||
: m_options(p_options)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
VectorSetReader:: ~VectorSetReader()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<VectorSetReader>
|
||||
VectorSetReader::CreateInstance(std::shared_ptr<ReaderOptions> p_options)
|
||||
{
|
||||
return std::shared_ptr<VectorSetReader>(new DefaultReader(std::move(p_options)));
|
||||
}
|
||||
|
||||
|
@ -1,17 +1,17 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "inc/IndexBuilder/VectorSetReaders/DefaultReader.h"
|
||||
#include "inc/Helper/VectorSetReaders/DefaultReader.h"
|
||||
#include "inc/Helper/StringConvert.h"
|
||||
#include "inc/Helper/CommonHelper.h"
|
||||
#include "inc/IndexBuilder/ThreadPool.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
|
||||
using namespace SPTAG;
|
||||
using namespace SPTAG::IndexBuilder;
|
||||
using namespace SPTAG::Helper;
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -139,10 +139,13 @@ private:
|
||||
} // namespace Local
|
||||
} // namespace
|
||||
|
||||
DefaultReader::DefaultReader(std::shared_ptr<BuilderOptions> p_options)
|
||||
|
||||
DefaultReader::DefaultReader(std::shared_ptr<ReaderOptions> p_options)
|
||||
: VectorSetReader(std::move(p_options)),
|
||||
m_subTaskBlocksize(0)
|
||||
m_subTaskBlocksize(0)
|
||||
{
|
||||
omp_set_num_threads(m_options->m_threadNum);
|
||||
|
||||
std::string tempFolder("tempfolder");
|
||||
if (!direxists(tempFolder.c_str()))
|
||||
{
|
||||
@ -180,7 +183,7 @@ DefaultReader::LoadFile(const std::string& p_filePaths)
|
||||
{
|
||||
const auto& files = GetFileSizes(p_filePaths);
|
||||
std::vector<std::function<void()>> subWorks;
|
||||
subWorks.reserve(files.size() * ThreadPool::CurrentThreadNum());
|
||||
subWorks.reserve(files.size() * m_options->m_threadNum);
|
||||
|
||||
m_subTaskCount = 0;
|
||||
for (const auto& fileInfo : files)
|
||||
@ -197,7 +200,7 @@ DefaultReader::LoadFile(const std::string& p_filePaths)
|
||||
std::size_t blockSize = m_subTaskBlocksize;
|
||||
if (0 == blockSize)
|
||||
{
|
||||
fileTaskCount = ThreadPool::CurrentThreadNum();
|
||||
fileTaskCount = m_options->m_threadNum;
|
||||
blockSize = (fileInfo.second + fileTaskCount - 1) / fileTaskCount;
|
||||
}
|
||||
else
|
||||
@ -223,9 +226,10 @@ DefaultReader::LoadFile(const std::string& p_filePaths)
|
||||
|
||||
m_waitSignal.Reset(m_subTaskCount);
|
||||
|
||||
for (auto& workItem : subWorks)
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
for (int64_t i = 0; i < (int64_t)subWorks.size(); i++)
|
||||
{
|
||||
ThreadPool::Queue(std::move(workItem));
|
||||
subWorks[i]();
|
||||
}
|
||||
|
||||
m_waitSignal.Wait();
|
||||
@ -244,7 +248,7 @@ DefaultReader::GetVectorSet() const
|
||||
|
||||
std::ifstream inputStream;
|
||||
inputStream.open(m_vectorOutput, std::ifstream::binary);
|
||||
inputStream.seekg(sizeof(uint32_t) + sizeof(uint32_t), std::ifstream::beg);
|
||||
inputStream.seekg(sizeof(SizeType) + sizeof(DimensionType), std::ifstream::beg);
|
||||
inputStream.read(vecBuf, m_totalRecordVectorBytes);
|
||||
inputStream.close();
|
||||
|
||||
@ -276,7 +280,7 @@ DefaultReader::LoadFileInternal(const std::string& p_filePath,
|
||||
std::ofstream metaStreamContent;
|
||||
std::ofstream metaStreamIndex;
|
||||
|
||||
std::uint32_t recordCount = 0;
|
||||
SizeType recordCount = 0;
|
||||
std::uint64_t metaOffset = 0;
|
||||
std::size_t totalRead = 0;
|
||||
std::streamoff startpos = p_fileBlockID * p_fileBlockSize;
|
||||
@ -400,12 +404,12 @@ DefaultReader::MergeData()
|
||||
std::unique_ptr<char[]> bufferHolder(new char[bufferSize]);
|
||||
char* buf = bufferHolder.get();
|
||||
|
||||
std::uint32_t uint32Var = m_totalRecordCount;
|
||||
SizeType totalRecordCount = m_totalRecordCount;
|
||||
|
||||
outputStream.open(m_vectorOutput, std::ofstream::binary);
|
||||
|
||||
outputStream.write(reinterpret_cast<char*>(&uint32Var), sizeof(uint32Var));
|
||||
outputStream.write(reinterpret_cast<char*>(&(m_options->m_dimension)), sizeof(m_options->m_dimension));
|
||||
outputStream.write(reinterpret_cast<char*>(&totalRecordCount), sizeof(totalRecordCount));
|
||||
outputStream.write(reinterpret_cast<char*>(&(m_options->m_dimension)), sizeof(m_options->m_dimension));
|
||||
|
||||
for (std::uint32_t i = 0; i < m_subTaskCount; ++i)
|
||||
{
|
||||
@ -442,7 +446,7 @@ DefaultReader::MergeData()
|
||||
|
||||
outputStream.open(m_metadataIndexOutput, std::ofstream::binary);
|
||||
|
||||
outputStream.write(reinterpret_cast<char*>(&uint32Var), sizeof(uint32Var));
|
||||
outputStream.write(reinterpret_cast<char*>(&totalRecordCount), sizeof(totalRecordCount));
|
||||
|
||||
std::uint64_t totalOffset = 0;
|
||||
for (std::uint32_t i = 0; i < m_subTaskCount; ++i)
|
||||
@ -453,18 +457,18 @@ DefaultReader::MergeData()
|
||||
file += ".tmp";
|
||||
|
||||
inputStream.open(file, std::ifstream::binary);
|
||||
for (std::uint32_t remains = m_subTaskRecordCount[i]; remains > 0;)
|
||||
for (SizeType remains = m_subTaskRecordCount[i]; remains > 0;)
|
||||
{
|
||||
std::size_t readBytesCount = min(remains * sizeof(std::uint64_t), bufferSizeTrim64);
|
||||
inputStream.read(buf, readBytesCount);
|
||||
std::uint64_t* offset = reinterpret_cast<std::uint64_t*>(buf);
|
||||
for (std::uint32_t i = 0; i < readBytesCount / sizeof(std::uint64_t); ++i)
|
||||
for (std::uint64_t i = 0; i < readBytesCount / sizeof(std::uint64_t); ++i)
|
||||
{
|
||||
offset[i] += totalOffset;
|
||||
}
|
||||
|
||||
outputStream.write(buf, readBytesCount);
|
||||
remains -= static_cast<std::uint32_t>(readBytesCount / sizeof(std::uint64_t));
|
||||
remains -= static_cast<SizeType>(readBytesCount / sizeof(std::uint64_t));
|
||||
}
|
||||
|
||||
inputStream.read(buf, sizeof(std::uint64_t));
|
@ -11,14 +11,8 @@ using namespace SPTAG::IndexBuilder;
|
||||
|
||||
|
||||
BuilderOptions::BuilderOptions()
|
||||
: m_threadNum(32),
|
||||
m_inputValueType(VectorValueType::Float),
|
||||
m_vectorDelimiter("|")
|
||||
: Helper::ReaderOptions(VectorValueType::Float, 0, "|", 32)
|
||||
{
|
||||
AddOptionalOption(m_threadNum, "-t", "--thread", "Thread Number.");
|
||||
AddOptionalOption(m_vectorDelimiter, "", "--delimiter", "Vector delimiter.");
|
||||
AddRequiredOption(m_dimension, "-d", "--dimension", "Dimension of vector.");
|
||||
AddRequiredOption(m_inputValueType, "-v", "--vectortype", "Input vector data type. Default is float.");
|
||||
AddRequiredOption(m_inputFiles, "-i", "--input", "Input raw data.");
|
||||
AddRequiredOption(m_outputFolder, "-o", "--outputfolder", "Output folder.");
|
||||
AddRequiredOption(m_indexAlgoType, "-a", "--algo", "Index Algorithm type.");
|
||||
|
@ -1,27 +0,0 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "inc/IndexBuilder/VectorSetReader.h"
|
||||
#include "inc/IndexBuilder/VectorSetReaders/DefaultReader.h"
|
||||
|
||||
|
||||
using namespace SPTAG;
|
||||
using namespace SPTAG::IndexBuilder;
|
||||
|
||||
VectorSetReader::VectorSetReader(std::shared_ptr<BuilderOptions> p_options)
|
||||
: m_options(p_options)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
VectorSetReader:: ~VectorSetReader()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<VectorSetReader>
|
||||
VectorSetReader::CreateInstance(std::shared_ptr<BuilderOptions> p_options)
|
||||
{
|
||||
return std::shared_ptr<VectorSetReader>(new DefaultReader(std::move(p_options)));
|
||||
}
|
||||
|
@ -1,9 +1,8 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "inc/IndexBuilder/ThreadPool.h"
|
||||
#include "inc/IndexBuilder/Options.h"
|
||||
#include "inc/IndexBuilder/VectorSetReader.h"
|
||||
#include "inc/Helper/VectorSetReader.h"
|
||||
#include "inc/Core/VectorIndex.h"
|
||||
#include "inc/Core/Common.h"
|
||||
#include "inc/Helper/SimpleIniReader.h"
|
||||
@ -20,7 +19,7 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
exit(1);
|
||||
}
|
||||
IndexBuilder::ThreadPool::Init(options->m_threadNum);
|
||||
|
||||
auto indexBuilder = VectorIndex::CreateInstance(options->m_indexAlgoType, options->m_inputValueType);
|
||||
|
||||
Helper::IniReader iniReader;
|
||||
@ -32,14 +31,14 @@ int main(int argc, char* argv[])
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
std::string param(argv[i]);
|
||||
int idx = (int)param.find("=");
|
||||
if (idx < 0) continue;
|
||||
size_t idx = param.find("=");
|
||||
if (idx == std::string::npos) continue;
|
||||
|
||||
std::string paramName = param.substr(0, idx);
|
||||
std::string paramVal = param.substr(idx + 1);
|
||||
std::string sectionName;
|
||||
idx = (int)paramName.find(".");
|
||||
if (idx >= 0) {
|
||||
idx = paramName.find(".");
|
||||
if (idx != std::string::npos) {
|
||||
sectionName = paramName.substr(0, idx);
|
||||
paramName = paramName.substr(idx + 1);
|
||||
}
|
||||
@ -63,9 +62,10 @@ int main(int argc, char* argv[])
|
||||
fprintf(stderr, "Failed to read input file.\n");
|
||||
exit(1);
|
||||
}
|
||||
int row, col;
|
||||
inputStream.read((char*)&row, sizeof(int));
|
||||
inputStream.read((char*)&col, sizeof(int));
|
||||
SizeType row;
|
||||
DimensionType col;
|
||||
inputStream.read((char*)&row, sizeof(SizeType));
|
||||
inputStream.read((char*)&col, sizeof(DimensionType));
|
||||
std::uint64_t totalRecordVectorBytes = ((std::uint64_t)GetValueTypeSize(options->m_inputValueType)) * row * col;
|
||||
ByteArray vectorSet = ByteArray::Alloc(totalRecordVectorBytes);
|
||||
char* vecBuf = reinterpret_cast<char*>(vectorSet.Data());
|
||||
@ -81,7 +81,7 @@ int main(int argc, char* argv[])
|
||||
indexBuilder->SaveIndex(options->m_outputFolder);
|
||||
}
|
||||
else {
|
||||
auto vectorReader = IndexBuilder::VectorSetReader::CreateInstance(options);
|
||||
auto vectorReader = Helper::VectorSetReader::CreateInstance(options);
|
||||
if (ErrorCode::Success != vectorReader->LoadFile(options->m_inputFiles))
|
||||
{
|
||||
fprintf(stderr, "Failed to read input file.\n");
|
||||
|
@ -15,13 +15,13 @@
|
||||
using namespace SPTAG;
|
||||
|
||||
template <typename T>
|
||||
float CalcRecall(std::vector<QueryResult> &results, const std::vector<std::set<int>> &truth, int NumQuerys, int K, std::ofstream& log)
|
||||
float CalcRecall(std::vector<QueryResult> &results, const std::vector<std::set<SizeType>> &truth, SizeType NumQuerys, int K, std::ofstream& log)
|
||||
{
|
||||
float meanrecall = 0, minrecall = MaxDist, maxrecall = 0, stdrecall = 0;
|
||||
std::vector<float> thisrecall(NumQuerys, 0);
|
||||
for (int i = 0; i < NumQuerys; i++)
|
||||
for (SizeType i = 0; i < NumQuerys; i++)
|
||||
{
|
||||
for (int id : truth[i])
|
||||
for (SizeType id : truth[i])
|
||||
{
|
||||
for (int j = 0; j < K; j++)
|
||||
{
|
||||
@ -38,7 +38,7 @@ float CalcRecall(std::vector<QueryResult> &results, const std::vector<std::set<i
|
||||
if (thisrecall[i] > maxrecall) maxrecall = thisrecall[i];
|
||||
}
|
||||
meanrecall /= NumQuerys;
|
||||
for (int i = 0; i < NumQuerys; i++)
|
||||
for (SizeType i = 0; i < NumQuerys; i++)
|
||||
{
|
||||
stdrecall += (thisrecall[i] - meanrecall) * (thisrecall[i] - meanrecall);
|
||||
}
|
||||
@ -47,11 +47,11 @@ float CalcRecall(std::vector<QueryResult> &results, const std::vector<std::set<i
|
||||
return meanrecall;
|
||||
}
|
||||
|
||||
void LoadTruth(std::ifstream& fp, std::vector<std::set<int>>& truth, int NumQuerys, int K)
|
||||
void LoadTruth(std::ifstream& fp, std::vector<std::set<SizeType>>& truth, SizeType NumQuerys, int K)
|
||||
{
|
||||
int get;
|
||||
SizeType get;
|
||||
std::string line;
|
||||
for (int i = 0; i < NumQuerys; ++i)
|
||||
for (SizeType i = 0; i < NumQuerys; ++i)
|
||||
{
|
||||
truth[i].clear();
|
||||
for (int j = 0; j < K; ++j)
|
||||
@ -70,8 +70,8 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
|
||||
std::string truthFile = reader.GetParameter("Index", "TruthFile", std::string("truth.txt"));
|
||||
std::string outputFile = reader.GetParameter("Index", "ResultFile", std::string(""));
|
||||
|
||||
int numBatchQuerys = reader.GetParameter("Index", "NumBatchQuerys", 10000);
|
||||
int numDebugQuerys = reader.GetParameter("Index", "NumDebugQuerys", -1);
|
||||
SizeType numBatchQuerys = reader.GetParameter("Index", "NumBatchQuerys", (SizeType)10000);
|
||||
SizeType numDebugQuerys = reader.GetParameter("Index", "NumDebugQuerys", (SizeType)-1);
|
||||
int K = reader.GetParameter("Index", "K", 32);
|
||||
|
||||
std::vector<std::string> maxCheck = Helper::StrUtils::SplitString(reader.GetParameter("Index", "MaxCheck", std::string("2048")), "#");
|
||||
@ -100,13 +100,13 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
|
||||
return -1;
|
||||
}
|
||||
|
||||
int numQuerys = (numDebugQuerys >= 0) ? numDebugQuerys : numBatchQuerys;
|
||||
SizeType numQuerys = (numDebugQuerys >= 0) ? numDebugQuerys : numBatchQuerys;
|
||||
|
||||
std::vector<std::vector<T>> Query(numQuerys, std::vector<T>(index.GetFeatureDim(), 0));
|
||||
std::vector<std::set<int>> truth(numQuerys);
|
||||
std::vector<std::set<SizeType>> truth(numQuerys);
|
||||
std::vector<QueryResult> results(numQuerys, QueryResult(NULL, K, 0));
|
||||
|
||||
int * latencies = new int[numQuerys + 1];
|
||||
clock_t * latencies = new clock_t[numQuerys + 1];
|
||||
|
||||
int base = 1;
|
||||
if (index.GetDistCalcMethod() == DistCalcMethod::Cosine) {
|
||||
@ -114,7 +114,7 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
|
||||
}
|
||||
int basesquare = base * base;
|
||||
|
||||
int dims = index.GetFeatureDim();
|
||||
DimensionType dims = index.GetFeatureDim();
|
||||
std::vector<std::string> QStrings;
|
||||
while (!inStream.eof())
|
||||
{
|
||||
@ -122,43 +122,33 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
|
||||
COMMON::Utils::PrepareQuerys(inStream, QStrings, Query, numQuerys, dims, index.GetDistCalcMethod(), base);
|
||||
if (numQuerys == 0) break;
|
||||
|
||||
for (int i = 0; i < numQuerys; i++) results[i].SetTarget(Query[i].data());
|
||||
for (SizeType i = 0; i < numQuerys; i++) results[i].SetTarget(Query[i].data());
|
||||
if (ftruth.is_open()) LoadTruth(ftruth, truth, numQuerys, K);
|
||||
|
||||
std::cout << " \t[avg] \t[99%] \t[95%] \t[recall] \t[mem]" << std::endl;
|
||||
|
||||
int subSize = (numQuerys - 1) / index.GetNumThreads() + 1;
|
||||
SizeType subSize = (numQuerys - 1) / omp_get_num_threads() + 1;
|
||||
for (std::string& mc : maxCheck)
|
||||
{
|
||||
index.SetParameter("MaxCheck", mc.c_str());
|
||||
for (int i = 0; i < numQuerys; i++) results[i].Reset();
|
||||
for (SizeType i = 0; i < numQuerys; i++) results[i].Reset();
|
||||
|
||||
if (index.GetNumThreads() == 1)
|
||||
#pragma omp parallel for
|
||||
for (int tid = 0; tid < omp_get_num_threads(); tid++)
|
||||
{
|
||||
for (int i = 0; i < numQuerys; i++)
|
||||
SizeType start = tid * subSize;
|
||||
SizeType end = min((tid + 1) * subSize, numQuerys);
|
||||
for (SizeType i = start; i < end; i++)
|
||||
{
|
||||
latencies[i] = clock();
|
||||
index.SearchIndex(results[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int tid = 0; tid < index.GetNumThreads(); tid++)
|
||||
{
|
||||
int start = tid * subSize;
|
||||
int end = min((tid + 1) * subSize, numQuerys);
|
||||
for (int i = start; i < end; i++)
|
||||
{
|
||||
latencies[i] = clock();
|
||||
index.SearchIndex(results[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
latencies[numQuerys] = clock();
|
||||
|
||||
float timeMean = 0, timeMin = MaxDist, timeMax = 0, timeStd = 0;
|
||||
for (int i = 0; i < numQuerys; i++)
|
||||
for (SizeType i = 0; i < numQuerys; i++)
|
||||
{
|
||||
if (latencies[i + 1] >= latencies[i])
|
||||
latencies[i] = latencies[i + 1] - latencies[i];
|
||||
@ -169,16 +159,16 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
|
||||
if (latencies[i] < timeMin) timeMin = (float)latencies[i];
|
||||
}
|
||||
timeMean /= numQuerys;
|
||||
for (int i = 0; i < numQuerys; i++) timeStd += ((float)latencies[i] - timeMean) * ((float)latencies[i] - timeMean);
|
||||
for (SizeType i = 0; i < numQuerys; i++) timeStd += ((float)latencies[i] - timeMean) * ((float)latencies[i] - timeMean);
|
||||
timeStd = std::sqrt(timeStd / numQuerys);
|
||||
log << timeMean << " " << timeStd << " " << timeMin << " " << timeMax << " ";
|
||||
|
||||
std::sort(latencies, latencies + numQuerys, [](int x, int y)
|
||||
std::sort(latencies, latencies + numQuerys, [](clock_t x, clock_t y)
|
||||
{
|
||||
return x < y;
|
||||
});
|
||||
float l99 = float(latencies[int(numQuerys * 0.99)]) / CLOCKS_PER_SEC;
|
||||
float l95 = float(latencies[int(numQuerys * 0.95)]) / CLOCKS_PER_SEC;
|
||||
float l99 = float(latencies[SizeType(numQuerys * 0.99)]) / CLOCKS_PER_SEC;
|
||||
float l95 = float(latencies[SizeType(numQuerys * 0.95)]) / CLOCKS_PER_SEC;
|
||||
|
||||
float recall = 0;
|
||||
if (ftruth.is_open())
|
||||
@ -202,7 +192,7 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
|
||||
if (fp.is_open())
|
||||
{
|
||||
fp << std::setprecision(3) << std::fixed;
|
||||
for (int i = 0; i < numQuerys; i++)
|
||||
for (SizeType i = 0; i < numQuerys; i++)
|
||||
{
|
||||
fp << QStrings[i] << ":";
|
||||
for (int j = 0; j < K; j++)
|
||||
@ -258,13 +248,13 @@ int main(int argc, char** argv)
|
||||
{
|
||||
std::string param(argv[i]);
|
||||
size_t idx = param.find("=");
|
||||
if (idx < 0) continue;
|
||||
if (idx == std::string::npos) continue;
|
||||
|
||||
std::string paramName = param.substr(0, idx);
|
||||
std::string paramVal = param.substr(idx + 1);
|
||||
std::string sectionName;
|
||||
idx = paramName.find(".");
|
||||
if (idx >= 0) {
|
||||
if (idx != std::string::npos) {
|
||||
sectionName = paramName.substr(0, idx);
|
||||
paramName = paramName.substr(idx + 1);
|
||||
}
|
||||
|
@ -114,7 +114,7 @@ SearchService::Run()
|
||||
void
|
||||
SearchService::RunSocketMode()
|
||||
{
|
||||
auto threadNum = max((unsigned int)1, m_serviceContext->GetServiceSettings()->m_threadNum);
|
||||
auto threadNum = max((SizeType)1, m_serviceContext->GetServiceSettings()->m_threadNum);
|
||||
m_threadPool.reset(new boost::asio::thread_pool(threadNum));
|
||||
|
||||
Socket::PacketHandlerMapPtr handlerMap(new Socket::PacketHandlerMap);
|
||||
@ -161,7 +161,7 @@ SearchService::RunInteractiveMode()
|
||||
std::unique_ptr<char[]> inputBuffer(new char[bufferSize]);
|
||||
while (true)
|
||||
{
|
||||
fprintf(stdout, "Query: ");
|
||||
std::cout << "Query: ";
|
||||
if (!fgets(inputBuffer.get(), bufferSize, stdin))
|
||||
{
|
||||
break;
|
||||
@ -169,29 +169,28 @@ SearchService::RunInteractiveMode()
|
||||
|
||||
auto callback = [](std::shared_ptr<SearchExecutionContext> p_exeContext)
|
||||
{
|
||||
fprintf(stdout, "Result:\n");
|
||||
std::cout << "Result:" << std::endl;
|
||||
if (nullptr == p_exeContext)
|
||||
{
|
||||
fprintf(stdout, "Not Executed.\n");
|
||||
std::cout << "Not Executed." << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& results = p_exeContext->GetResults();
|
||||
for (const auto& result : results)
|
||||
{
|
||||
fprintf(stdout, "Index: %s\n", result.m_indexName.c_str());
|
||||
std::cout << "Index: " << result.m_indexName << std::endl;
|
||||
int idx = 0;
|
||||
for (const auto& res : result.m_results)
|
||||
{
|
||||
fprintf(stdout, "------------------\n");
|
||||
fprintf(stdout, "DocIndex: %d Distance: %f", res.VID, res.Dist);
|
||||
std::cout << "------------------" << std::endl;
|
||||
std::cout << "DocIndex: " << res.VID << " Distance: " << res.Dist;
|
||||
if (result.m_results.WithMeta())
|
||||
{
|
||||
const auto& metadata = result.m_results.GetMetadata(idx);
|
||||
fprintf(stdout, " MetaData: %.*s", static_cast<int>(metadata.Length()), metadata.Data());
|
||||
std::cout << " MetaData: " << std::string((char*)metadata.Data(), metadata.Length());
|
||||
}
|
||||
|
||||
fprintf(stdout, "\n");
|
||||
std::cout << std::endl;
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
|
68
core/src/index/thirdparty/SPTAG/CMakeLists.txt
vendored
68
core/src/index/thirdparty/SPTAG/CMakeLists.txt
vendored
@ -19,12 +19,12 @@ if(NOT WIN32)
|
||||
endif()
|
||||
|
||||
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
|
||||
# require at least gcc 4.7
|
||||
if (CXX_COMPILER_VERSION VERSION_LESS 4.7)
|
||||
message(FATAL_ERROR "GCC version must be at least 4.7!")
|
||||
# require at least gcc 5.0
|
||||
if (CXX_COMPILER_VERSION VERSION_LESS 5.0)
|
||||
message(FATAL_ERROR "GCC version must be at least 5.0!")
|
||||
endif()
|
||||
set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -lm -lrt -DNDEBUG -std=c++11 -fopenmp -march=native")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -ggdb -lm -lrt -DNDEBUG -std=c++11 -fopenmp -march=native")
|
||||
set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -lm -lrt -DNDEBUG -std=c++14 -fopenmp -march=native")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -ggdb -lm -lrt -DNDEBUG -std=c++14 -fopenmp -march=native")
|
||||
elseif(WIN32)
|
||||
if(NOT MSVC14)
|
||||
message(FATAL_ERROR "On Windows, only MSVC version 14 are supported!")
|
||||
@ -74,54 +74,18 @@ else()
|
||||
message (FATAL_ERROR "Could no find openmp!")
|
||||
endif()
|
||||
|
||||
#find_package(Boost 1.67 COMPONENTS system thread serialization wserialization regex)
|
||||
#if (Boost_FOUND)
|
||||
# include_directories (${Boost_INCLUDE_DIR})
|
||||
# link_directories (${Boost_LIBRARY_DIR} "/usr/lib")
|
||||
# message (STATUS "Found Boost.")
|
||||
# message (STATUS "Include Path: ${Boost_INCLUDE_DIRS}")
|
||||
# message (STATUS "Library Path: ${Boost_LIBRARY_DIRS}")
|
||||
# message (STATUS "Library: ${Boost_LIBRARIES}")
|
||||
#else()
|
||||
# message (FATAL_ERROR "Could not find Boost 1.67!")
|
||||
#endif()
|
||||
|
||||
#set(Boost_LIBRARIES
|
||||
# boost_system_static
|
||||
# boost_filesystem_static
|
||||
# boost_serialization_static
|
||||
# boost_wserialization_static
|
||||
# boost_regex_static
|
||||
# boost_thread_static)
|
||||
set(TBB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../tbb)
|
||||
|
||||
if (WIN32)
|
||||
if (${CMAKE_SIZEOF_VOID_P} EQUAL "8")
|
||||
set (TBB_LIBRARY_SUFFIX "lib/intel64/vc14")
|
||||
else()
|
||||
set (TBB_LIBRARY_SUFFIX "lib/ia32/vc14")
|
||||
endif()
|
||||
|
||||
find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "C:/Program Files/Intel/TBB" PATH_SUFFIXES include)
|
||||
find_library(TBB_LIBRARIES tbb${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "C:/Program Files/Intel/TBB" PATH_SUFFIXES ${TBB_LIBRARY_SUFFIX})
|
||||
find_package(Boost 1.67 COMPONENTS system thread serialization wserialization regex)
|
||||
if (Boost_FOUND)
|
||||
include_directories (${Boost_INCLUDE_DIR})
|
||||
link_directories (${Boost_LIBRARY_DIR} "/usr/lib")
|
||||
message (STATUS "Found Boost.")
|
||||
message (STATUS "Include Path: ${Boost_INCLUDE_DIRS}")
|
||||
message (STATUS "Library Path: ${Boost_LIBRARY_DIRS}")
|
||||
message (STATUS "Library: ${Boost_LIBRARIES}")
|
||||
else()
|
||||
find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "/usr/" PATH_SUFFIXES include)
|
||||
find_library(TBB_LIBRARIES libtbb${CMAKE_SHARED_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "/usr/")
|
||||
endif()
|
||||
|
||||
set(TBB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../tbb)
|
||||
find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "/usr/" PATH_SUFFIXES include)
|
||||
find_library(TBB_LIBRARIES libtbb${CMAKE_SHARED_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "/usr/")
|
||||
|
||||
if (TBB_INCLUDE_DIRS AND TBB_LIBRARIES)
|
||||
include_directories (${TBB_INCLUDE_DIRS})
|
||||
message (STATUS "Found TBB.")
|
||||
message (STATUS "Include Path:" ${TBB_INCLUDE_DIRS})
|
||||
message (STATUS "Library:" ${TBB_LIBRARIES})
|
||||
else()
|
||||
message (FATAL_ERROR "Could not find TBB!")
|
||||
message (FATAL_ERROR "Could not find Boost 1.67!")
|
||||
endif()
|
||||
|
||||
add_subdirectory (AnnService)
|
||||
#add_subdirectory (Wrappers)
|
||||
#add_subdirectory (Test)
|
||||
add_subdirectory (Wrappers)
|
||||
add_subdirectory (Test)
|
||||
|
2
core/src/index/thirdparty/SPTAG/Dockerfile
vendored
2
core/src/index/thirdparty/SPTAG/Dockerfile
vendored
@ -6,7 +6,7 @@ COPY AnnService ./AnnService/
|
||||
COPY Test ./Test/
|
||||
COPY Wrappers ./Wrappers/
|
||||
|
||||
RUN apt-get update && apt-get -y install wget build-essential libtbb-dev \
|
||||
RUN apt-get update && apt-get -y install wget build-essential \
|
||||
# remove the following if you don't want to build the wrappers
|
||||
openjdk-8-jdk python3-pip swig
|
||||
|
||||
|
4
core/src/index/thirdparty/SPTAG/README.md
vendored
4
core/src/index/thirdparty/SPTAG/README.md
vendored
@ -43,7 +43,6 @@ The searches in the trees and the graph are iteratively conducted.
|
||||
* swig >= 3.0
|
||||
* cmake >= 3.12.0
|
||||
* boost >= 1.67.0
|
||||
* tbb >= 4.2
|
||||
|
||||
### **Install**
|
||||
|
||||
@ -66,7 +65,7 @@ Compiling the ALL_BUILD project in the Visual Studio (at least 2015) will genera
|
||||
```bash
|
||||
docker build -t sptag .
|
||||
```
|
||||
Will build a docker container with binaries in `/app/Release/`
|
||||
Will build a docker container with binaries in `/app/Release/`.
|
||||
|
||||
### **Verify**
|
||||
|
||||
@ -75,6 +74,7 @@ Run the test (or Test.exe) in the Release folder to verify all the tests have pa
|
||||
### **Usage**
|
||||
|
||||
The detailed usage can be found in [Get started](docs/GettingStart.md).
|
||||
The detailed parameters tunning can be found in [Parameters](docs/Parameters.md).
|
||||
|
||||
## **References**
|
||||
Please cite SPTAG in your publications if it helps your research:
|
||||
|
41
core/src/index/thirdparty/SPTAG/SPTAG.sln
vendored
41
core/src/index/thirdparty/SPTAG/SPTAG.sln
vendored
@ -1,4 +1,3 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.25420.1
|
||||
@ -66,6 +65,22 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JavaClient", "Wrappers\Java
|
||||
{C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CsharpCore", "Wrappers\CsharpCore.vcxproj", "{1896C009-AD46-4A70-B83C-4652A7F37503}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CsharpClient", "Wrappers\CsharpClient.vcxproj", "{363BA3BB-75C4-4CC7-AECB-28C7534B3710}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{F9A72303-6381-4C80-86FF-606A2F6F7B96} = {F9A72303-6381-4C80-86FF-606A2F6F7B96}
|
||||
{C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLRCore", "Wrappers\CLRCore.vcxproj", "{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
@ -162,6 +177,30 @@ Global
|
||||
{8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x64.ActiveCfg = Release|x64
|
||||
{8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.ActiveCfg = Release|Win32
|
||||
{1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x64.Build.0 = Debug|x64
|
||||
{1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x86.Build.0 = Debug|Win32
|
||||
{1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x64.ActiveCfg = Release|x64
|
||||
{1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x64.Build.0 = Release|x64
|
||||
{1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x86.ActiveCfg = Release|Win32
|
||||
{1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x86.Build.0 = Release|Win32
|
||||
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x64.Build.0 = Debug|x64
|
||||
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x86.Build.0 = Debug|Win32
|
||||
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x64.ActiveCfg = Release|x64
|
||||
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x64.Build.0 = Release|x64
|
||||
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x86.ActiveCfg = Release|Win32
|
||||
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x86.Build.0 = Release|Win32
|
||||
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x64.Build.0 = Debug|x64
|
||||
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x86.Build.0 = Debug|Win32
|
||||
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x64.ActiveCfg = Release|x64
|
||||
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x64.Build.0 = Release|x64
|
||||
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x86.ActiveCfg = Release|Win32
|
||||
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
@ -23,7 +23,7 @@ include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PR
|
||||
file(GLOB TEST_HDR_FILES ${PROJECT_SOURCE_DIR}/Test/inc/Test.h)
|
||||
file(GLOB TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/Test/src/*.cpp)
|
||||
add_executable (test ${TEST_SRC_FILES} ${TEST_HDR_FILES})
|
||||
target_link_libraries(test SPTAGLib ${Boost_LIBRARIES} ${TBB_LIBRARIES})
|
||||
target_link_libraries(test SPTAGLib ${Boost_LIBRARIES})
|
||||
|
||||
install(TARGETS test
|
||||
RUNTIME DESTINATION bin
|
||||
|
@ -166,8 +166,6 @@
|
||||
<Import Project="..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets" Condition="Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets" Condition="Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_unit_test_framework-vc140.1.67.0.0\build\boost_unit_test_framework-vc140.targets" Condition="Exists('..\packages\boost_unit_test_framework-vc140.1.67.0.0\build\boost_unit_test_framework-vc140.targets')" />
|
||||
<Import Project="..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets" Condition="Exists('..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets')" />
|
||||
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
|
||||
</ImportGroup>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
@ -181,7 +179,5 @@
|
||||
<Error Condition="!Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_unit_test_framework-vc140.1.67.0.0\build\boost_unit_test_framework-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_unit_test_framework-vc140.1.67.0.0\build\boost_unit_test_framework-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
|
||||
</Target>
|
||||
</Project>
|
@ -2,6 +2,4 @@
|
||||
<packages>
|
||||
<package id="boost" version="1.67.0.0" targetFramework="native" />
|
||||
<package id="boost_unit_test_framework-vc140" version="1.67.0.0" targetFramework="native" />
|
||||
<package id="tbb_oss" version="9.107.0.0" targetFramework="native" />
|
||||
<package id="tbb_oss.redist" version="9.107.0.0" targetFramework="native" />
|
||||
</packages>
|
@ -5,118 +5,143 @@
|
||||
#include "inc/Helper/SimpleIniReader.h"
|
||||
#include "inc/Core/VectorIndex.h"
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
template <typename T>
|
||||
void Build(SPTAG::IndexAlgoType algo, std::string distCalcMethod, T* vec, int n, int m)
|
||||
void Build(SPTAG::IndexAlgoType algo, std::string distCalcMethod, std::shared_ptr<SPTAG::VectorSet>& vec, std::shared_ptr<SPTAG::MetadataSet>& meta, const std::string out)
|
||||
{
|
||||
std::vector<char> meta;
|
||||
std::vector<long long> metaoffset;
|
||||
for (int i = 0; i < n; i++) {
|
||||
metaoffset.push_back(meta.size());
|
||||
std::string a = std::to_string(i);
|
||||
for (int j = 0; j < a.length(); j++)
|
||||
meta.push_back(a[j]);
|
||||
}
|
||||
metaoffset.push_back(meta.size());
|
||||
|
||||
std::shared_ptr<SPTAG::VectorSet> vecset(new SPTAG::BasicVectorSet(
|
||||
SPTAG::ByteArray((std::uint8_t*)vec, n * m * sizeof(T), false),
|
||||
SPTAG::GetEnumValueType<T>(), m, n));
|
||||
|
||||
std::shared_ptr<SPTAG::MetadataSet> metaset(new SPTAG::MemMetadataSet(
|
||||
SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false),
|
||||
SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(long long), false),
|
||||
n));
|
||||
std::shared_ptr<SPTAG::VectorIndex> vecIndex = SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType<T>());
|
||||
vecIndex->SetParameter("DistCalcMethod", distCalcMethod);
|
||||
BOOST_CHECK(nullptr != vecIndex);
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vecset, metaset));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("origindices"));
|
||||
|
||||
vecIndex->SetParameter("DistCalcMethod", distCalcMethod);
|
||||
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vec, meta));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Search(std::string folder, T* vec, int k)
|
||||
void BuildWithMetaMapping(SPTAG::IndexAlgoType algo, std::string distCalcMethod, std::shared_ptr<SPTAG::VectorSet>& vec, std::shared_ptr<SPTAG::MetadataSet>& meta, const std::string out)
|
||||
{
|
||||
|
||||
std::shared_ptr<SPTAG::VectorIndex> vecIndex = SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType<T>());
|
||||
BOOST_CHECK(nullptr != vecIndex);
|
||||
|
||||
vecIndex->SetParameter("DistCalcMethod", distCalcMethod);
|
||||
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vec, meta, true));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Search(const std::string folder, T* vec, SPTAG::SizeType n, int k, std::string* truthmeta)
|
||||
{
|
||||
std::shared_ptr<SPTAG::VectorIndex> vecIndex;
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex));
|
||||
BOOST_CHECK(nullptr != vecIndex);
|
||||
|
||||
SPTAG::QueryResult res(vec, k, true);
|
||||
vecIndex->SearchIndex(res);
|
||||
for (int i = 0; i < k; i++) {
|
||||
std::cout << res.GetResult(i)->Dist << "@(" << res.GetResult(i)->VID << "," << std::string((char*)res.GetMetadata(i).Data(), res.GetMetadata(i).Length()) << ") ";
|
||||
for (SPTAG::SizeType i = 0; i < n; i++)
|
||||
{
|
||||
SPTAG::QueryResult res(vec, k, true);
|
||||
vecIndex->SearchIndex(res);
|
||||
std::unordered_set<std::string> resmeta;
|
||||
for (int j = 0; j < k; j++)
|
||||
{
|
||||
resmeta.insert(std::string((char*)res.GetMetadata(j).Data(), res.GetMetadata(j).Length()));
|
||||
std::cout << res.GetResult(j)->Dist << "@(" << res.GetResult(j)->VID << "," << std::string((char*)res.GetMetadata(j).Data(), res.GetMetadata(j).Length()) << ") ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
for (int j = 0; j < k; j++)
|
||||
{
|
||||
BOOST_CHECK(resmeta.find(truthmeta[i * k + j]) != resmeta.end());
|
||||
}
|
||||
vec += vecIndex->GetFeatureDim();
|
||||
}
|
||||
std::cout << std::endl;
|
||||
vecIndex.reset();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Add(T* vec, int n)
|
||||
void Add(const std::string folder, std::shared_ptr<SPTAG::VectorSet>& vec, std::shared_ptr<SPTAG::MetadataSet>& meta, const std::string out)
|
||||
{
|
||||
std::shared_ptr<SPTAG::VectorIndex> vecIndex;
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex("origindices", vecIndex));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex));
|
||||
BOOST_CHECK(nullptr != vecIndex);
|
||||
|
||||
std::vector<char> meta;
|
||||
std::vector<long long> metaoffset;
|
||||
for (int i = 0; i < n; i++) {
|
||||
metaoffset.push_back(meta.size());
|
||||
std::string a = std::to_string(vecIndex->GetNumSamples() + i);
|
||||
for (int j = 0; j < a.length(); j++)
|
||||
meta.push_back(a[j]);
|
||||
}
|
||||
metaoffset.push_back(meta.size());
|
||||
|
||||
int m = vecIndex->GetFeatureDim();
|
||||
std::shared_ptr<SPTAG::VectorSet> vecset(new SPTAG::BasicVectorSet(
|
||||
SPTAG::ByteArray((std::uint8_t*)vec, n * m * sizeof(T), false),
|
||||
SPTAG::GetEnumValueType<T>(), m, n));
|
||||
|
||||
std::shared_ptr<SPTAG::MetadataSet> metaset(new SPTAG::MemMetadataSet(
|
||||
SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false),
|
||||
SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(long long), false),
|
||||
n));
|
||||
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->AddIndex(vecset, metaset));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("addindices"));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->AddIndex(vec, meta));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out));
|
||||
vecIndex.reset();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Delete(T* vec, int n)
|
||||
void Delete(const std::string folder, T* vec, SPTAG::SizeType n, const std::string out)
|
||||
{
|
||||
std::shared_ptr<SPTAG::VectorIndex> vecIndex;
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex("addindices", vecIndex));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex));
|
||||
BOOST_CHECK(nullptr != vecIndex);
|
||||
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->DeleteIndex((const void*)vec, n));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("delindices"));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out));
|
||||
vecIndex.reset();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Test(SPTAG::IndexAlgoType algo, std::string distCalcMethod)
|
||||
{
|
||||
int n = 100, q = 3, m = 10, k = 3;
|
||||
SPTAG::SizeType n = 100, q = 3;
|
||||
SPTAG::DimensionType m = 10;
|
||||
int k = 3;
|
||||
std::vector<T> vec;
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = 0; j < m; j++) {
|
||||
for (SPTAG::SizeType i = 0; i < n; i++) {
|
||||
for (SPTAG::DimensionType j = 0; j < m; j++) {
|
||||
vec.push_back((T)i);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<T> query;
|
||||
for (int i = 0; i < q; i++) {
|
||||
for (int j = 0; j < m; j++) {
|
||||
for (SPTAG::SizeType i = 0; i < q; i++) {
|
||||
for (SPTAG::DimensionType j = 0; j < m; j++) {
|
||||
query.push_back((T)i*2);
|
||||
}
|
||||
}
|
||||
|
||||
Build<T>(algo, distCalcMethod, vec.data(), n, m);
|
||||
Search<T>("origindices", query.data(), k);
|
||||
Add<T>(query.data(), q);
|
||||
Search<T>("addindices", query.data(), k);
|
||||
Delete<T>(query.data(), q);
|
||||
Search<T>("delindices", query.data(), k);
|
||||
std::vector<char> meta;
|
||||
std::vector<std::uint64_t> metaoffset;
|
||||
for (SPTAG::SizeType i = 0; i < n; i++) {
|
||||
metaoffset.push_back((std::uint64_t)meta.size());
|
||||
std::string a = std::to_string(i);
|
||||
for (size_t j = 0; j < a.length(); j++)
|
||||
meta.push_back(a[j]);
|
||||
}
|
||||
metaoffset.push_back((std::uint64_t)meta.size());
|
||||
|
||||
std::shared_ptr<SPTAG::VectorSet> vecset(new SPTAG::BasicVectorSet(
|
||||
SPTAG::ByteArray((std::uint8_t*)vec.data(), sizeof(T) * n * m, false),
|
||||
SPTAG::GetEnumValueType<T>(), m, n));
|
||||
|
||||
std::shared_ptr<SPTAG::MetadataSet> metaset(new SPTAG::MemMetadataSet(
|
||||
SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false),
|
||||
SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(std::uint64_t), false),
|
||||
n));
|
||||
|
||||
Build<T>(algo, distCalcMethod, vecset, metaset, "testindices");
|
||||
std::string truthmeta1[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" };
|
||||
Search<T>("testindices", query.data(), q, k, truthmeta1);
|
||||
|
||||
Add<T>("testindices", vecset, metaset, "testindices");
|
||||
std::string truthmeta2[] = { "0", "0", "1", "2", "2", "1", "4", "4", "3" };
|
||||
Search<T>("testindices", query.data(), q, k, truthmeta2);
|
||||
|
||||
Delete<T>("testindices", query.data(), q, "testindices");
|
||||
std::string truthmeta3[] = { "1", "1", "3", "1", "3", "1", "3", "5", "3" };
|
||||
Search<T>("testindices", query.data(), q, k, truthmeta3);
|
||||
|
||||
BuildWithMetaMapping<T>(algo, distCalcMethod, vecset, metaset, "testindices");
|
||||
std::string truthmeta4[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" };
|
||||
Search<T>("testindices", query.data(), q, k, truthmeta4);
|
||||
|
||||
Add<T>("testindices", vecset, metaset, "testindices");
|
||||
std::string truthmeta5[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" };
|
||||
Search<T>("testindices", query.data(), q, k, truthmeta5);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE (AlgoTest)
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "inc/Core/Common/DistanceUtils.h"
|
||||
|
||||
template<typename T>
|
||||
static float ComputeCosineDistance(const T *pX, const T *pY, int length) {
|
||||
static float ComputeCosineDistance(const T *pX, const T *pY, SPTAG::DimensionType length) {
|
||||
float diff = 0;
|
||||
const T* pEnd1 = pX + length;
|
||||
while (pX < pEnd1) diff += (*pX++) * (*pY++);
|
||||
@ -14,7 +14,7 @@ static float ComputeCosineDistance(const T *pX, const T *pY, int length) {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static float ComputeL2Distance(const T *pX, const T *pY, int length)
|
||||
static float ComputeL2Distance(const T *pX, const T *pY, SPTAG::DimensionType length)
|
||||
{
|
||||
float diff = 0;
|
||||
const T* pEnd1 = pX + length;
|
||||
@ -32,10 +32,10 @@ T random(int high = RAND_MAX, int low = 0) // Generates a random value.
|
||||
|
||||
template<typename T>
|
||||
void test(int high) {
|
||||
int dimension = random<int>(256, 2);
|
||||
SPTAG::DimensionType dimension = random<SPTAG::DimensionType>(256, 2);
|
||||
T *X = new T[dimension], *Y = new T[dimension];
|
||||
BOOST_ASSERT(X != nullptr && Y != nullptr);
|
||||
for (int i = 0; i < dimension; i++) {
|
||||
for (SPTAG::DimensionType i = 0; i < dimension; i++) {
|
||||
X[i] = random<T>(high, -high);
|
||||
Y[i] = random<T>(high, -high);
|
||||
}
|
||||
|
@ -1,79 +0,0 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "inc/Test.h"
|
||||
#include "inc/Helper/SimpleIniReader.h"
|
||||
#include "inc/Core/VectorIndex.h"
|
||||
|
||||
|
||||
template<typename T>
|
||||
void Test(SPTAG::IndexAlgoType algo, std::string distCalcMethod) {
|
||||
int n = 100, q = 3, m = 10, k = 3;
|
||||
std::vector<T> vec;
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = 0; j < m; j++) {
|
||||
vec.push_back((T) i);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<T> query;
|
||||
for (int i = 0; i < q; i++) {
|
||||
for (int j = 0; j < m; j++) {
|
||||
query.push_back((T) i * 2);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<SPTAG::VectorSet> vecset(new SPTAG::BasicVectorSet(
|
||||
SPTAG::ByteArray((std::uint8_t *) vec.data(), n * m * sizeof(T), false),
|
||||
SPTAG::GetEnumValueType<T>(), m, n));
|
||||
|
||||
std::vector<void *> blobs;
|
||||
std::vector<int64_t> len;
|
||||
{
|
||||
std::shared_ptr<SPTAG::VectorIndex> vecIndex =
|
||||
SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType<T>());
|
||||
vecIndex->SetParameter("DistCalcMethod", distCalcMethod);
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vecset, nullptr));
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndexToMemory(blobs, len));
|
||||
}
|
||||
|
||||
std::vector<void *> clone_blobs;
|
||||
std::vector<int64_t> clone_len;
|
||||
for (auto i = 0; i < blobs.size(); ++i) {
|
||||
auto mem = malloc(len[i]);
|
||||
BOOST_CHECK(NULL != mem);
|
||||
memcpy(mem, blobs[i], len[i]);
|
||||
clone_blobs.push_back(mem);
|
||||
clone_len.push_back(len[i]);
|
||||
}
|
||||
|
||||
std::shared_ptr<SPTAG::VectorIndex> clone_index =
|
||||
SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType<T>());
|
||||
clone_index->SetParameter("DistCalcMethod", distCalcMethod);
|
||||
BOOST_CHECK(SPTAG::ErrorCode::Success == clone_index->LoadIndexFromMemory(clone_blobs));
|
||||
|
||||
SPTAG::QueryResult res(vec.data(), k, true);
|
||||
clone_index->SearchIndex(res);
|
||||
for (int i = 0; i < k; i++) {
|
||||
std::cout << res.GetResult(i)->Dist << "@(" << res.GetResult(i)->VID << ","
|
||||
<< std::string((char *) res.GetMetadata(i).Data(), res.GetMetadata(i).Length()) << ") ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
for (auto &blob : blobs)
|
||||
free(blob);
|
||||
for (auto &blob : clone_blobs)
|
||||
free(blob);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE (SerializeTest)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(KDTree) {
|
||||
Test<float>(SPTAG::IndexAlgoType::KDT, "L2");
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(BKTree) {
|
||||
Test<float>(SPTAG::IndexAlgoType::BKT, "L2");
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
141
core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj
vendored
Normal file
141
core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj
vendored
Normal file
@ -0,0 +1,141 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}</ProjectGuid>
|
||||
<TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
|
||||
<Keyword>ManagedCProj</Keyword>
|
||||
<RootNamespace>CLRCore</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<Import Project="$(SolutionDir)\AnnService.users.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CLRSupport>true</CLRSupport>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CLRSupport>true</CLRSupport>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CLRSupport>true</CLRSupport>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CLRSupport>true</CLRSupport>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup>
|
||||
<TargetName>Microsoft.ANN.SPTAGManaged</TargetName>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
|
||||
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
|
||||
<LibraryPath>$(OutLibDir);$(LibraryPath)</LibraryPath>
|
||||
<OutDir>$(OutAppDir)</OutDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>_DEBUG;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
<OpenMPSupport>true</OpenMPSupport>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>CoreLibrary.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>NDEBUG;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
<OpenMPSupport>true</OpenMPSupport>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>CoreLibrary.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\CLRCoreInterface.h" />
|
||||
<ClInclude Include="inc\ManagedObject.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\AssemblyInfo.cpp" />
|
||||
<ClCompile Include="src\CLRCoreInterface.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="$(SolutionDir)AnnService\CoreLibrary.vcxproj">
|
||||
<Project>{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}</Project>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
32
core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj.filters
vendored
Normal file
32
core/src/index/thirdparty/SPTAG/Wrappers/CLRCore.vcxproj.filters
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Header Files">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Resources">
|
||||
<UniqueIdentifier>{ba4289c4-f872-4dbc-a57f-7b415614afb3}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\CLRCoreInterface.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\ManagedObject.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\CLRCoreInterface.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\AssemblyInfo.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -5,101 +5,167 @@ find_package(Python2 COMPONENTS Development)
|
||||
if (Python2_FOUND)
|
||||
include_directories (${Python2_INCLUDE_DIRS})
|
||||
link_directories (${Python2_LIBRARY_DIRS})
|
||||
set (Python_INCLUDE_DIRS ${Python2_INCLUDE_DIRS})
|
||||
set (Python_INCLUDE_DIRS ${Python2_INCLUDE_DIRS})
|
||||
set (Python_LIBRARIES ${Python2_LIBRARIES})
|
||||
set (Python_FOUND true)
|
||||
set (Python_FOUND true)
|
||||
else()
|
||||
find_package(Python3 COMPONENTS Development)
|
||||
if (Python3_FOUND)
|
||||
include_directories (${Python3_INCLUDE_DIRS})
|
||||
link_directories (${Python3_LIBRARY_DIRS})
|
||||
set (Python_INCLUDE_DIRS ${Python3_INCLUDE_DIRS})
|
||||
set (Python_INCLUDE_DIRS ${Python3_INCLUDE_DIRS})
|
||||
set (Python_LIBRARIES ${Python3_LIBRARIES})
|
||||
set (Python_FOUND true)
|
||||
set (Python_FOUND true)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (Python_FOUND)
|
||||
message (STATUS "Found Python.")
|
||||
message (STATUS "Found Python.")
|
||||
message (STATUS "Include Path: ${Python_INCLUDE_DIRS}")
|
||||
message (STATUS "Library Path: ${Python_LIBRARIES}")
|
||||
|
||||
if (WIN32)
|
||||
set(PY_SUFFIX .pyd)
|
||||
else()
|
||||
set(PY_SUFFIX .so)
|
||||
endif()
|
||||
|
||||
execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonCore.i)
|
||||
execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonClient.i)
|
||||
if (WIN32)
|
||||
set(PY_SUFFIX .pyd)
|
||||
else()
|
||||
set(PY_SUFFIX .so)
|
||||
endif()
|
||||
|
||||
include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
|
||||
execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonCore.i)
|
||||
execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonClient.i)
|
||||
|
||||
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
|
||||
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp)
|
||||
add_library (_SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
|
||||
set_target_properties(_SPTAG PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX})
|
||||
target_link_libraries(_SPTAG SPTAGLib ${Python_LIBRARIES} ${TBB_LIBRARIES})
|
||||
add_custom_command(TARGET _SPTAG POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${EXECUTABLE_OUTPUT_PATH})
|
||||
include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
|
||||
|
||||
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
|
||||
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp)
|
||||
add_library (_SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
|
||||
set_target_properties(_SPTAGClient PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX})
|
||||
target_link_libraries(_SPTAGClient SPTAGLib ${Python_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES})
|
||||
add_custom_command(TARGET _SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py ${EXECUTABLE_OUTPUT_PATH})
|
||||
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
|
||||
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp)
|
||||
add_library (_SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
|
||||
set_target_properties(_SPTAG PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX})
|
||||
target_link_libraries(_SPTAG SPTAGLib ${Python_LIBRARIES})
|
||||
add_custom_command(TARGET _SPTAG POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${EXECUTABLE_OUTPUT_PATH})
|
||||
|
||||
install(TARGETS _SPTAG _SPTAGClient
|
||||
RUNTIME DESTINATION bin
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py DESTINATION bin)
|
||||
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
|
||||
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp)
|
||||
add_library (_SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
|
||||
set_target_properties(_SPTAGClient PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX})
|
||||
target_link_libraries(_SPTAGClient SPTAGLib ${Python_LIBRARIES} ${Boost_LIBRARIES})
|
||||
add_custom_command(TARGET _SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py ${EXECUTABLE_OUTPUT_PATH})
|
||||
|
||||
install(TARGETS _SPTAG _SPTAGClient
|
||||
RUNTIME DESTINATION bin
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/SPTAG.py ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py DESTINATION bin)
|
||||
else()
|
||||
message (STATUS "Could not find Python.")
|
||||
message (STATUS "Could not find Python.")
|
||||
endif()
|
||||
|
||||
find_package(JNI)
|
||||
if (!JNI_FOUND)
|
||||
if (JNI_FOUND)
|
||||
include_directories (${JNI_INCLUDE_DIRS})
|
||||
link_directories (${JNI_LIBRARY_DIRS})
|
||||
message (STATUS "Found JNI.")
|
||||
message (STATUS "Include Path: ${JNI_INCLUDE_DIRS}")
|
||||
message (STATUS "Library Path: ${JNI_LIBRARIES}")
|
||||
|
||||
if (WIN32)
|
||||
set (JAVA_SUFFIX .dll)
|
||||
else()
|
||||
set (JAVA_SUFFIX .so)
|
||||
endif()
|
||||
|
||||
execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaCore.i)
|
||||
execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaClient.i)
|
||||
|
||||
include_directories(${JNI_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
|
||||
if (WIN32)
|
||||
set (JAVA_SUFFIX .dll)
|
||||
else()
|
||||
set (JAVA_SUFFIX .so)
|
||||
endif()
|
||||
|
||||
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
|
||||
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp)
|
||||
add_library (SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
|
||||
set_target_properties(SPTAG PROPERTIES SUFFIX ${JAVA_SUFFIX})
|
||||
target_link_libraries(SPTAG SPTAGLib ${JNI_LIBRARIES} ${TBB_LIBRARIES})
|
||||
execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaCore.i)
|
||||
execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaClient.i)
|
||||
|
||||
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
|
||||
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp)
|
||||
add_library (SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
|
||||
set_target_properties(SPTAGClient PROPERTIES SUFFIX ${JAVA_SUFFIX})
|
||||
target_link_libraries(SPTAGClient SPTAGLib ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES})
|
||||
|
||||
file(GLOB JAVA_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java)
|
||||
foreach(JAVA_FILE ${JAVA_FILES})
|
||||
message (STATUS "Add copy post-command for file " ${JAVA_FILE})
|
||||
add_custom_command(TARGET SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${JAVA_FILE} ${EXECUTABLE_OUTPUT_PATH})
|
||||
endforeach(JAVA_FILE)
|
||||
|
||||
install(TARGETS SPTAG SPTAGClient
|
||||
RUNTIME DESTINATION bin
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java DESTINATION bin)
|
||||
include_directories(${JNI_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
|
||||
|
||||
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
|
||||
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp)
|
||||
add_library (JAVASPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
|
||||
set_target_properties(JAVASPTAG PROPERTIES SUFFIX ${JAVA_SUFFIX})
|
||||
target_link_libraries(JAVASPTAG SPTAGLib ${JNI_LIBRARIES})
|
||||
|
||||
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
|
||||
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp)
|
||||
add_library (JAVASPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
|
||||
set_target_properties(JAVASPTAGClient PROPERTIES SUFFIX ${JAVA_SUFFIX})
|
||||
target_link_libraries(JAVASPTAGClient SPTAGLib ${JNI_LIBRARIES} ${Boost_LIBRARIES})
|
||||
|
||||
file(GLOB JAVA_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java)
|
||||
foreach(JAVA_FILE ${JAVA_FILES})
|
||||
message (STATUS "Add copy post-command for file " ${JAVA_FILE})
|
||||
add_custom_command(TARGET JAVASPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${JAVA_FILE} ${EXECUTABLE_OUTPUT_PATH})
|
||||
endforeach(JAVA_FILE)
|
||||
|
||||
install(TARGETS JAVASPTAG JAVASPTAGClient
|
||||
RUNTIME DESTINATION bin
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java DESTINATION bin)
|
||||
else()
|
||||
message (STATUS "Could not find JNI.")
|
||||
endif()
|
||||
message (STATUS "Could not find JNI.")
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
if (${PROJECTNAME_ARCHITECTURE} MATCHES "x64")
|
||||
set (csharp_dotnet_framework_hints "$ENV{windir}\\Microsoft.NET\\Framework64")
|
||||
else()
|
||||
set (csharp_dotnet_framework_hints "$ENV{windir}\\Microsoft.NET\\Framework")
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE csharp_dotnet_executables ${csharp_dotnet_framework_hints}/csc.exe)
|
||||
list(SORT csharp_dotnet_executables)
|
||||
list(REVERSE csharp_dotnet_executables)
|
||||
foreach (csharp_dotnet_executable ${csharp_dotnet_executables})
|
||||
if (NOT DEFINED DOTNET_FOUND)
|
||||
string(REPLACE "${csharp_dotnet_framework_hints}/" "" csharp_dotnet_version_temp ${csharp_dotnet_executable})
|
||||
string(REPLACE "/csc.exe" "" csharp_dotnet_version_temp ${csharp_dotnet_version_temp})
|
||||
|
||||
set (DOTNET_EXECUTABLE_VERSION "${csharp_dotnet_version_temp}" CACHE STRING "C# .NET compiler version" FORCE)
|
||||
set (DOTNET_FOUND ${csharp_dotnet_executable})
|
||||
endif()
|
||||
endforeach(csharp_dotnet_executable)
|
||||
else()
|
||||
FIND_PROGRAM(DOTNET_FOUND dotnet)
|
||||
endif()
|
||||
|
||||
if (DOTNET_FOUND)
|
||||
message (STATUS "Found dotnet.")
|
||||
message (STATUS "DOTNET_EXECUTABLE: " ${DOTNET_FOUND})
|
||||
|
||||
if (WIN32)
|
||||
set (CSHARP_SUFFIX .dll)
|
||||
else()
|
||||
set (CSHARP_SUFFIX .so)
|
||||
endif()
|
||||
|
||||
execute_process(COMMAND swig -csharp -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_cwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CsharpCore.i)
|
||||
execute_process(COMMAND swig -csharp -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_cwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CsharpClient.i)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
|
||||
|
||||
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
|
||||
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_cwrap.cpp)
|
||||
add_library (CSHARPSPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
|
||||
set_target_properties(CSHARPSPTAG PROPERTIES SUFFIX ${CSHARP_SUFFIX})
|
||||
target_link_libraries(CSHARPSPTAG SPTAGLib)
|
||||
|
||||
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
|
||||
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_cwrap.cpp)
|
||||
add_library (CSHARPSPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
|
||||
set_target_properties(CSHARPSPTAGClient PROPERTIES SUFFIX ${CSHARP_SUFFIX})
|
||||
target_link_libraries(CSHARPSPTAGClient SPTAGLib ${Boost_LIBRARIES})
|
||||
|
||||
file(GLOB CSHARP_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.cs)
|
||||
foreach(CSHARP_FILE ${CSHARP_FILES})
|
||||
message (STATUS "Add copy post-command for file " ${CSHARP_FILE})
|
||||
add_custom_command(TARGET CSHARPSPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CSHARP_FILE} ${EXECUTABLE_OUTPUT_PATH})
|
||||
endforeach(CSHARP_FILE)
|
||||
|
||||
install(TARGETS CSHARPSPTAG CSHARPSPTAGClient
|
||||
RUNTIME DESTINATION bin
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.cs DESTINATION bin)
|
||||
else()
|
||||
message (STATUS "Could not find C#.")
|
||||
endif()
|
||||
|
||||
|
191
core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj
vendored
Normal file
191
core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{363BA3BB-75C4-4CC7-AECB-28C7534B3710}</ProjectGuid>
|
||||
<RootNamespace>CsharpClient</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<Import Project="$(SolutionDir)\AnnService.users.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup>
|
||||
<TargetName>CSHARPSPTAGClient</TargetName>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
|
||||
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
|
||||
<LibraryPath>$(OutLibDir);$(LibraryPath)</LibraryPath>
|
||||
<OutDir>$(OutAppDir)</OutDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<Link>
|
||||
<AdditionalDependencies>CoreLibrary.lib;SocketLib.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PreprocessorDefinitions>_WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PreprocessorDefinitions>_WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ControlFlowGuard>Guard</ControlFlowGuard>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalOptions>/guard:cf %(AdditionalOptions)</AdditionalOptions>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\ClientInterface.h" />
|
||||
<ClInclude Include="inc\TransferDataType.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\ClientInterface.cpp" />
|
||||
<ClCompile Include="$(IntDir)ClientInterface_cwrap.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="inc\CsharpClient.i">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="inc\CsharpCommon.i" />
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="..\packages\boost.1.67.0.0\build\boost.targets" Condition="Exists('..\packages\boost.1.67.0.0\build\boost.targets')" />
|
||||
<Import Project="..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets" Condition="Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_date_time-vc140.1.67.0.0\build\boost_date_time-vc140.targets" Condition="Exists('..\packages\boost_date_time-vc140.1.67.0.0\build\boost_date_time-vc140.targets')" />
|
||||
<Import Project="..\packages\boost_regex-vc140.1.67.0.0\build\boost_regex-vc140.targets" Condition="Exists('..\packages\boost_regex-vc140.1.67.0.0\build\boost_regex-vc140.targets')" />
|
||||
</ImportGroup>
|
||||
<Target Name="BeforeBuild" BeforeTargets="PrepareForBuild">
|
||||
<MakeDir Directories="$(IntDir)" />
|
||||
<Exec Command="$(SolutionDir)packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe -csharp -c++ -I$(IntDir) -outdir $(IntDir) -o $(IntDir)/ClientInterface_cwrap.cpp inc\CsharpClient.i" />
|
||||
</Target>
|
||||
<ItemGroup>
|
||||
<MySourceFiles Include="$(IntDir)\*.cs" />
|
||||
</ItemGroup>
|
||||
<Target Name="CopyFiles" AfterTargets="BeforeBuild">
|
||||
<Copy SourceFiles="@(MySourceFiles)" DestinationFolder="$(Outdir)" />
|
||||
</Target>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
|
||||
</PropertyGroup>
|
||||
<Error Condition="!Exists('..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe'))" />
|
||||
<Error Condition="!Exists('..\packages\boost.1.67.0.0\build\boost.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost.1.67.0.0\build\boost.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_date_time-vc140.1.67.0.0\build\boost_date_time-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_date_time-vc140.1.67.0.0\build\boost_date_time-vc140.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\boost_regex-vc140.1.67.0.0\build\boost_regex-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_regex-vc140.1.67.0.0\build\boost_regex-vc140.targets'))" />
|
||||
</Target>
|
||||
</Project>
|
41
core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj.filters
vendored
Normal file
41
core/src/index/thirdparty/SPTAG/Wrappers/CsharpClient.vcxproj.filters
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Header Files">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Resource Files">
|
||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\ClientInterface.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\TransferDataType.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\ClientInterface.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="$(IntDir)ClientInterface_cwrap.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="inc\CsharpClient.i">
|
||||
<Filter>Resource Files</Filter>
|
||||
</None>
|
||||
<None Include="inc\CsharpCommon.i">
|
||||
<Filter>Resource Files</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
134
core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj
vendored
Normal file
134
core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj
vendored
Normal file
@ -0,0 +1,134 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{1896C009-AD46-4A70-B83C-4652A7F37503}</ProjectGuid>
|
||||
<RootNamespace>CsharpCore</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<Import Project="$(SolutionDir)\AnnService.users.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup>
|
||||
<TargetName>CSHARPSPTAG</TargetName>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
|
||||
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
|
||||
<LibraryPath>$(OutLibDir);$(LibraryPath)</LibraryPath>
|
||||
<OutDir>$(OutAppDir)</OutDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<Link>
|
||||
<AdditionalDependencies>CoreLibrary.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">_WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<ControlFlowGuard Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Guard</ControlFlowGuard>
|
||||
<DebugInformationFormat Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ProgramDatabase</DebugInformationFormat>
|
||||
<PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">_WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/guard:cf %(AdditionalOptions)</AdditionalOptions>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\CoreInterface.h" />
|
||||
<ClInclude Include="inc\TransferDataType.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\CoreInterface.cpp" />
|
||||
<ClCompile Include="$(IntDir)CoreInterface_cwrap.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="inc\CsharpCommon.i" />
|
||||
<None Include="inc\CsharpCore.i" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
<Target Name="BeforeBuild" BeforeTargets="PrepareForBuild">
|
||||
<MakeDir Directories="$(IntDir)" />
|
||||
<Exec Command="$(SolutionDir)packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe -csharp -c++ -I$(IntDir) -outdir $(IntDir) -o $(IntDir)CoreInterface_cwrap.cpp inc\CsharpCore.i" />
|
||||
</Target>
|
||||
<ItemGroup>
|
||||
<MySourceFiles Include="$(IntDir)\*.cs" />
|
||||
</ItemGroup>
|
||||
<Target Name="CopyFiles" AfterTargets="BeforeBuild">
|
||||
<Copy SourceFiles="@(MySourceFiles)" DestinationFolder="$(Outdir)" />
|
||||
</Target>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
|
||||
</PropertyGroup>
|
||||
<Error Condition="!Exists('..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe'))" />
|
||||
</Target>
|
||||
</Project>
|
40
core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj.filters
vendored
Normal file
40
core/src/index/thirdparty/SPTAG/Wrappers/CsharpCore.vcxproj.filters
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Header Files">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Resources">
|
||||
<UniqueIdentifier>{ba4289c4-f872-4dbc-a57f-7b415614afb3}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\CoreInterface.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="inc\TransferDataType.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\CoreInterface.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="$(IntDir)CoreInterface_cwrap.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="inc\CsharpCore.i">
|
||||
<Filter>Resources</Filter>
|
||||
</None>
|
||||
<None Include="inc\CsharpCommon.i">
|
||||
<Filter>Resources</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -70,7 +70,7 @@
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup>
|
||||
<TargetName>SPTAGClient</TargetName>
|
||||
<TargetName>JAVASPTAGClient</TargetName>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
|
||||
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
|
||||
@ -158,6 +158,7 @@
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="inc\JavaCommon.i" />
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
|
@ -34,5 +34,8 @@
|
||||
<None Include="inc\JavaClient.i">
|
||||
<Filter>Resource Files</Filter>
|
||||
</None>
|
||||
<None Include="inc\JavaCommon.i">
|
||||
<Filter>Resource Files</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -70,7 +70,7 @@
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup>
|
||||
<TargetName>SPTAG</TargetName>
|
||||
<TargetName>JAVASPTAG</TargetName>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
|
||||
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
|
||||
@ -106,6 +106,7 @@
|
||||
<ClCompile Include="$(IntDir)CoreInterface_jwrap.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="inc\JavaCommon.i" />
|
||||
<None Include="inc\JavaCore.i" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@ -113,8 +114,6 @@
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets" Condition="Exists('..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets')" />
|
||||
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
|
||||
</ImportGroup>
|
||||
<Target Name="BeforeBuild" BeforeTargets="PrepareForBuild">
|
||||
<MakeDir Directories="$(IntDir)" />
|
||||
@ -131,7 +130,5 @@
|
||||
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
|
||||
</PropertyGroup>
|
||||
<Error Condition="!Exists('..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe'))" />
|
||||
<Error Condition="!Exists('..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
|
||||
</Target>
|
||||
</Project>
|
@ -30,9 +30,11 @@
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="packages.config" />
|
||||
<None Include="inc\JavaCore.i">
|
||||
<Filter>Resources</Filter>
|
||||
</None>
|
||||
<None Include="inc\JavaCommon.i">
|
||||
<Filter>Resources</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user