fix conflict

Former-commit-id: 9b741b6360e0704ec875766c7ef3448d0eba4212
This commit is contained in:
Yu Kun 2019-10-15 16:31:30 +08:00
commit 975403286e
135 changed files with 4171 additions and 2704 deletions

View File

@ -10,7 +10,14 @@ container('milvus-build-env') {
sh "git config --global user.name \"test\""
withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
sh "./build.sh -l"
sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' && export JFROG_USER_NAME='${USERNAME}' && export JFROG_PASSWORD='${PASSWORD}' && ./build.sh -t ${params.BUILD_TYPE} -j -u -c"
sh "rm -rf cmake_build"
sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' \
&& export JFROG_USER_NAME='${USERNAME}' \
&& export JFROG_PASSWORD='${PASSWORD}' \
&& export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.2.1/faiss-branch-0.2.1.tar.gz' \
&& ./build.sh -t ${params.BUILD_TYPE} -j -u -c"
sh "./coverage.sh -u root -p Fantast1c -t 192.168.1.194"
}
}
} catch (exc) {

View File

@ -11,6 +11,7 @@ container('milvus-build-env') {
withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
sh "./build.sh -l"
sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' && export JFROG_USER_NAME='${USERNAME}' && export JFROG_PASSWORD='${PASSWORD}' && ./build.sh -t ${params.BUILD_TYPE} -j"
sh "./coverage.sh -u root -p Fantast1c -t 192.168.1.194"
}
}
} catch (exc) {

View File

@ -11,17 +11,21 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-587 - Count get wrong result after adding vectors and index built immediately
- MS-599 - Search wrong result when table created with metric_type: IP
- MS-601 - Docker logs error caused by get CPUTemperature error
- MS-622 - Delete vectors should be failed if date range is invalid
- MS-605 - Server going down during searching vectors
- MS-620 - Get table row counts display wrong error code
- MS-622 - Delete vectors should be failed if date range is invalid
- MS-624 - Search vectors failed if time ranges long enough
- MS-637 - Out of memory when load too many tasks
- MS-639 - SQ8H index created failed and server hang
- MS-640 - Cache object size calculate incorrect
- MS-641 - Segment fault(signal 11) in PickToLoad
- MS-639 - SQ8H index created failed and server hang
- MS-647 - [monitor] grafana display average cpu-temp
- MS-644 - Search crashed with index-type: flat
- MS-624 - Search vectors failed if time ranges long enough
- MS-647 - grafana display average cpu-temp
- MS-652 - IVFSQH quantization double free
- MS-650 - SQ8H index create issue
- MS-653 - When config check fail, Milvus close without message
- MS-654 - Describe index timeout when building index
- MS-658 - Fix SQ8 Hybrid can't search
## Improvement
- MS-552 - Add and change the easylogging library
@ -39,10 +43,11 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-608 - Update TODO names
- MS-609 - Update task construct function
- MS-611 - Add resources validity check in ResourceMgr
- MS-619 - Add optimizer class in scheduler
- MS-614 - Preload table at startup
- MS-619 - Add optimizer class in scheduler
- MS-626 - Refactor DataObj to support cache any type data
- MS-648 - Improve unittest
- MS-655 - Upgrade SPTAG
## New Feature
- MS-627 - Integrate new index: IVFSQHybrid
@ -60,8 +65,8 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-600 - Reconstruct unittest code
- MS-602 - Remove zilliz namespace
- MS-610 - Change error code base value from hex to decimal
- MS-635 - Add compile option to support customized faiss
- MS-624 - Re-organize project directory for open-source
- MS-635 - Add compile option to support customized faiss
# Milvus 0.4.0 (2019-09-12)

View File

@ -125,10 +125,6 @@ set(MILVUS_SOURCE_DIR ${PROJECT_SOURCE_DIR})
set(MILVUS_BINARY_DIR ${PROJECT_BINARY_DIR})
set(MILVUS_ENGINE_SRC ${PROJECT_SOURCE_DIR}/src)
if (CUSTOMIZATION)
add_definitions(-DCUSTOMIZATION)
endif (CUSTOMIZATION)
include(ExternalProject)
include(DefineOptions)
include(BuildUtils)
@ -136,6 +132,10 @@ include(ThirdPartyPackages)
config_summary()
if (CUSTOMIZATION)
add_definitions(-DCUSTOMIZATION)
endif (CUSTOMIZATION)
add_subdirectory(src)
if (BUILD_UNIT_TEST STREQUAL "ON")

View File

@ -76,13 +76,13 @@ $ sudo ln -s /path/to/libmysqlclient.so /path/to/libmysqlclient_r.so
###### Step 2 Build
```shell
$ cd [Milvus sourcecode path]/cpp
$ cd [Milvus sourcecode path]/core
$ ./build.sh -t Debug
or
$ ./build.sh -t Release
```
When the build is completed, all the stuff that you need in order to run Milvus will be installed under `[Milvus root path]/cpp/milvus`.
When the build is completed, all the stuff that you need in order to run Milvus will be installed under `[Milvus root path]/core/milvus`.
If you encounter the following error message,
`protocol https not supported or disabled in libcurl`
@ -148,11 +148,20 @@ $ sudo apt-get install lcov
```shell
$ ./build.sh -u -c
```
Run mysql docker
```shell
docker pull mysql:latest
docker run -p 3306:3306 -e MYSQL_ROOT_PASSWORD=123456 -d mysql:latest
```
Run code coverage
```shell
$ ./coverage.sh -u root -p 123456 -t 127.0.0.1
```
##### Launch Milvus server
```shell
$ cd [Milvus root path]/cpp/milvus
$ cd [Milvus root path]/core/milvus
```
Add `lib/` directory to `LD_LIBRARY_PATH`
@ -202,7 +211,7 @@ $ python3 example.py
```shell
# Run Milvus C++ example
$ cd [Milvus root path]/cpp/milvus/bin
$ cd [Milvus root path]/core/milvus/bin
$ ./sdk_simple
```

View File

@ -9,10 +9,16 @@ DB_PATH="/opt/milvus"
PROFILING="OFF"
USE_JFROG_CACHE="OFF"
RUN_CPPLINT="OFF"
CUSTOMIZATION="ON"
CUSTOMIZATION="OFF" # default use ori faiss
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
wget -q --method HEAD
CUSTOMIZED_FAISS_URL="${FAISS_URL:-NONE}"
wget -q --method HEAD ${CUSTOMIZED_FAISS_URL}
if [ $? -eq 0 ]; then
CUSTOMIZATION="ON"
else
CUSTOMIZATION="OFF"
fi
while getopts "p:d:t:ulrcgjhx" arg
do
@ -49,7 +55,7 @@ do
USE_JFROG_CACHE="ON"
;;
x)
CUSTOMIZATION="OFF"
CUSTOMIZATION="OFF" # force use ori faiss
;;
h) # help
echo "
@ -94,6 +100,7 @@ CMAKE_CMD="cmake \
-DMILVUS_ENABLE_PROFILING=${PROFILING} \
-DUSE_JFROG_CACHE=${USE_JFROG_CACHE} \
-DCUSTOMIZATION=${CUSTOMIZATION} \
-DFAISS_URL=${CUSTOMIZED_FAISS_URL} \
../"
echo ${CMAKE_CMD}
${CMAKE_CMD}
@ -136,11 +143,4 @@ else
fi
make install || exit 1
# evaluate code coverage
if [[ ${BUILD_COVERAGE} == "ON" ]]; then
cd -
bash `pwd`/coverage.sh
cd -
fi
fi

View File

@ -88,6 +88,11 @@ function(ExternalProject_Create_Cache project_name package_file install_path cac
file(REMOVE ${package_file})
endif()
string(REGEX REPLACE "(.+)/.+$" "\\1" package_dir ${package_file})
if(NOT EXISTS ${package_dir})
file(MAKE_DIRECTORY ${package_dir})
endif()
message(STATUS "Will create cached package file: ${package_file}")
ExternalProject_Add_Step(${project_name} package

View File

@ -2,6 +2,44 @@
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/milvus/lib
MYSQL_USER_NAME=root
MYSQL_PASSWORD=123456
MYSQL_HOST='127.0.0.1'
MYSQL_PORT='3306'
while getopts "u:p:t:h" arg
do
case $arg in
u)
MYSQL_USER_NAME=$OPTARG
;;
p)
MYSQL_PASSWORD=$OPTARG
;;
t)
MYSQL_HOST=$OPTARG
;;
h) # help
echo "
parameter:
-u: mysql account
-p: mysql password
-t: mysql host
-h: help
usage:
./coverage.sh -u \${MYSQL_USER} -p \${MYSQL_PASSWORD} -t \${MYSQL_HOST} [-h]
"
exit 0
;;
?)
echo "ERROR! unknown argument"
exit 1
;;
esac
done
LCOV_CMD="lcov"
LCOV_GEN_CMD="genhtml"
@ -15,34 +53,12 @@ DIR_GCNO="cmake_build"
DIR_UNITTEST="milvus/unittest"
# delete old code coverage info files
rm -f FILE_INFO_BASE
rm -f FILE_INFO_MILVUS
rm -f FILE_INFO_OUTPUT
rm -f FILE_INFO_OUTPUT_NEW
rm -rf lcov_out
rm -f FILE_INFO_BASE FILE_INFO_MILVUS FILE_INFO_OUTPUT FILE_INFO_OUTPUT_NEW
MYSQL_USER_NAME=root
MYSQL_PASSWORD=Fantast1c
MYSQL_HOST='192.168.1.194'
MYSQL_PORT='3306'
MYSQL_DB_NAME=milvus_`date +%s%N`
function mysql_exc()
{
cmd=$1
mysql -h${MYSQL_HOST} -u${MYSQL_USER_NAME} -p${MYSQL_PASSWORD} -e "${cmd}"
if [ $? -ne 0 ]; then
echo "mysql $cmd run failed"
fi
}
mysql_exc "CREATE DATABASE IF NOT EXISTS ${MYSQL_DB_NAME};"
mysql_exc "GRANT ALL PRIVILEGES ON ${MYSQL_DB_NAME}.* TO '${MYSQL_USER_NAME}'@'%';"
mysql_exc "FLUSH PRIVILEGES;"
mysql_exc "USE ${MYSQL_DB_NAME};"
MYSQL_USER_NAME=root
MYSQL_PASSWORD=Fantast1c
MYSQL_HOST='192.168.1.194'
MYSQL_PORT='3306'
MYSQL_DB_NAME=milvus_`date +%s%N`

View File

@ -26,6 +26,7 @@ include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-milvus)
#this statement must put here, since the CORE_INCLUDE_DIRS is defined in code/CMakeList.txt
add_subdirectory(index)
set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
foreach (dir ${CORE_INCLUDE_DIRS})
include_directories(${dir})
@ -182,8 +183,6 @@ target_link_libraries(milvus_server
install(TARGETS milvus_server DESTINATION bin)
install(FILES
${CMAKE_SOURCE_DIR}/src/index/thirdparty/tbb/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX}
${CMAKE_SOURCE_DIR}/src/index/thirdparty/tbb/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX}.2
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3.2.4

View File

@ -1,31 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "config/ConfigMgr.h"
#include "YamlConfigMgr.h"
namespace milvus {
namespace server {
ConfigMgr*
ConfigMgr::GetInstance() {
static YamlConfigMgr mgr;
return &mgr;
}
} // namespace server
} // namespace milvus

View File

@ -17,42 +17,28 @@
#pragma once
#include "ConfigNode.h"
#include "utils/Error.h"
#include <string>
#include "ConfigNode.h"
#include "utils/Status.h"
namespace milvus {
namespace server {
// this class can parse nested config file and return config item
// config file example(yaml style)
// AAA: 1
// BBB:
// CCC: hello
// DDD: 23.5
//
// usage
// const ConfigMgr* mgr = ConfigMgr::GetInstance();
// const ConfigNode& node = mgr->GetRootNode();
// std::string val = node.GetValue("AAA"); // return '1'
// const ConfigNode& child = node.GetChild("BBB");
// val = child.GetValue("CCC"); //return 'hello'
class ConfigMgr {
public:
static ConfigMgr*
GetInstance();
virtual ErrorCode
virtual Status
LoadConfigFile(const std::string& filename) = 0;
virtual void
Print() const = 0; // will be deleted
virtual std::string
DumpString() const = 0;
virtual const ConfigNode&
GetRootNode() const = 0;
virtual ConfigNode&
GetRootNode() = 0;
};

View File

@ -18,29 +18,20 @@
#include "config/YamlConfigMgr.h"
#include "utils/Log.h"
#include <sys/stat.h>
namespace milvus {
namespace server {
ErrorCode
Status
YamlConfigMgr::LoadConfigFile(const std::string& filename) {
struct stat directoryStat;
int statOK = stat(filename.c_str(), &directoryStat);
if (statOK != 0) {
SERVER_LOG_ERROR << "File not found: " << filename;
return SERVER_UNEXPECTED_ERROR;
}
try {
node_ = YAML::LoadFile(filename);
LoadConfigNode(node_, config_);
} catch (YAML::Exception& e) {
SERVER_LOG_ERROR << "Failed to load config file: " << std::string(e.what());
return SERVER_UNEXPECTED_ERROR;
std::string str = "Exception: load config file fail: " + std::string(e.what());
return Status(SERVER_UNEXPECTED_ERROR, str);
}
return SERVER_SUCCESS;
return Status::OK();
}
void

View File

@ -17,27 +17,35 @@
#pragma once
#include "ConfigMgr.h"
#include "ConfigNode.h"
#include "utils/Error.h"
#include <yaml-cpp/yaml.h>
#include <string>
#include "ConfigMgr.h"
#include "utils/Status.h"
namespace milvus {
namespace server {
class YamlConfigMgr : public ConfigMgr {
public:
virtual ErrorCode
static ConfigMgr*
GetInstance() {
static YamlConfigMgr mgr;
return &mgr;
}
virtual Status
LoadConfigFile(const std::string& filename);
virtual void
Print() const;
virtual std::string
DumpString() const;
virtual const ConfigNode&
GetRootNode() const;
virtual ConfigNode&
GetRootNode();

View File

@ -251,11 +251,6 @@ DBImpl::InsertVectors(const std::string& table_id, uint64_t n, const float* vect
Status status;
milvus::server::CollectInsertMetrics metrics(n, status);
status = mem_mgr_->InsertVectors(table_id, n, vectors, vector_ids);
// std::chrono::microseconds time_span =
// std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
// double average_time = double(time_span.count()) / n;
// ENGINE_LOG_DEBUG << "Insert vectors to cache finished";
return status;
}
@ -359,7 +354,7 @@ DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t npr
return Status(DB_ERROR, "Milsvus server is shutdown!");
}
ENGINE_LOG_DEBUG << "Query by dates for table: " << table_id;
ENGINE_LOG_DEBUG << "Query by dates for table: " << table_id << " date range count: " << dates.size();
// get all table files from table
meta::DatePartionedTableFilesSchema files;
@ -377,7 +372,7 @@ DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t npr
}
cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info before query
status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, dates, results);
status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results);
cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info after query
return status;
}
@ -389,7 +384,7 @@ DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_
return Status(DB_ERROR, "Milsvus server is shutdown!");
}
ENGINE_LOG_DEBUG << "Query by file ids for table: " << table_id;
ENGINE_LOG_DEBUG << "Query by file ids for table: " << table_id << " date range count: " << dates.size();
// get specified files
std::vector<size_t> ids;
@ -418,7 +413,7 @@ DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_
}
cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info before query
status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, dates, results);
status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results);
cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info after query
return status;
}
@ -437,14 +432,13 @@ DBImpl::Size(uint64_t& result) {
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Status
DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) {
uint64_t nprobe, const float* vectors, QueryResults& results) {
server::CollectQueryMetrics metrics(nq);
TimeRecorder rc("");
// step 1: get files to search
ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size()
<< " date range count: " << dates.size();
ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size();
scheduler::SearchJobPtr job = std::make_shared<scheduler::SearchJob>(0, k, nq, nprobe, vectors);
for (auto& file : files) {
scheduler::TableFileSchemaPtr file_ptr = std::make_shared<meta::TableFileSchema>(file);
@ -458,32 +452,7 @@ DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& fi
return job->GetStatus();
}
// step 3: print time cost information
// double load_cost = context->LoadCost();
// double search_cost = context->SearchCost();
// double reduce_cost = context->ReduceCost();
// std::string load_info = TimeRecorder::GetTimeSpanStr(load_cost);
// std::string search_info = TimeRecorder::GetTimeSpanStr(search_cost);
// std::string reduce_info = TimeRecorder::GetTimeSpanStr(reduce_cost);
// if(search_cost > 0.0 || reduce_cost > 0.0) {
// double total_cost = load_cost + search_cost + reduce_cost;
// double load_percent = load_cost/total_cost;
// double search_percent = search_cost/total_cost;
// double reduce_percent = reduce_cost/total_cost;
//
// ENGINE_LOG_DEBUG << "Engine load index totally cost: " << load_info
// << " percent: " << load_percent*100 << "%";
// ENGINE_LOG_DEBUG << "Engine search index totally cost: " << search_info
// << " percent: " << search_percent*100 << "%";
// ENGINE_LOG_DEBUG << "Engine reduce topk totally cost: " << reduce_info
// << " percent: " << reduce_percent*100 << "%";
// } else {
// ENGINE_LOG_DEBUG << "Engine load cost: " << load_info
// << " search cost: " << search_info
// << " reduce cost: " << reduce_info;
// }
// step 4: construct results
// step 3: construct results
results = job->GetResult();
rc.ElapseFromBegin("Engine query totally cost");
@ -540,7 +509,13 @@ DBImpl::StartMetricTask() {
server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total);
if (cache_total > 0) {
double cache_usage_double = cache_usage;
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total);
} else {
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0);
}
server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
uint64_t size;
Size(size);
@ -695,14 +670,13 @@ DBImpl::BackgroundMergeFiles(const std::string& table_id) {
return status;
}
bool has_merge = false;
for (auto& kv : raw_files) {
auto files = kv.second;
if (files.size() < options_.merge_trigger_number_) {
ENGINE_LOG_DEBUG << "Files number not greater equal than merge trigger number, skip merge action";
continue;
}
has_merge = true;
MergeFiles(table_id, kv.first, kv.second);
if (shutting_down_.load(std::memory_order_acquire)) {
@ -770,127 +744,6 @@ DBImpl::StartBuildIndexTask(bool force) {
}
}
Status
DBImpl::BuildIndex(const meta::TableFileSchema& file) {
ExecutionEnginePtr to_index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_,
(MetricType)file.metric_type_, file.nlist_);
if (to_index == nullptr) {
ENGINE_LOG_ERROR << "Invalid engine type";
return Status(DB_ERROR, "Invalid engine type");
}
try {
// step 1: load index
Status status = to_index->Load(options_.insert_cache_immediately_);
if (!status.ok()) {
ENGINE_LOG_ERROR << "Failed to load index file: " << status.ToString();
return status;
}
// step 2: create table file
meta::TableFileSchema table_file;
table_file.table_id_ = file.table_id_;
table_file.date_ = file.date_;
table_file.file_type_ =
meta::TableFileSchema::NEW_INDEX; // for multi-db-path, distribute index file averagely to each path
status = meta_ptr_->CreateTableFile(table_file);
if (!status.ok()) {
ENGINE_LOG_ERROR << "Failed to create table file: " << status.ToString();
return status;
}
// step 3: build index
std::shared_ptr<ExecutionEngine> index;
try {
server::CollectBuildIndexMetrics metrics;
index = to_index->BuildIndex(table_file.location_, (EngineType)table_file.engine_type_);
if (index == nullptr) {
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
status = meta_ptr_->UpdateTableFile(table_file);
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_
<< " to to_delete";
return status;
}
} catch (std::exception& ex) {
// typical error: out of gpu memory
std::string msg = "BuildIndex encounter exception: " + std::string(ex.what());
ENGINE_LOG_ERROR << msg;
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
status = meta_ptr_->UpdateTableFile(table_file);
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
std::cout << "ERROR: failed to build index, index file is too large or gpu memory is not enough"
<< std::endl;
return Status(DB_ERROR, msg);
}
// step 4: if table has been deleted, dont save index file
bool has_table = false;
meta_ptr_->HasTable(file.table_id_, has_table);
if (!has_table) {
meta_ptr_->DeleteTableFiles(file.table_id_);
return Status::OK();
}
// step 5: save index file
try {
index->Serialize();
} catch (std::exception& ex) {
// typical error: out of disk space or permition denied
std::string msg = "Serialize index encounter exception: " + std::string(ex.what());
ENGINE_LOG_ERROR << msg;
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
status = meta_ptr_->UpdateTableFile(table_file);
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
std::cout << "ERROR: failed to persist index file: " << table_file.location_
<< ", possible out of disk space" << std::endl;
return Status(DB_ERROR, msg);
}
// step 6: update meta
table_file.file_type_ = meta::TableFileSchema::INDEX;
table_file.file_size_ = index->PhysicalSize();
table_file.row_count_ = index->Count();
auto origin_file = file;
origin_file.file_type_ = meta::TableFileSchema::BACKUP;
meta::TableFilesSchema update_files = {table_file, origin_file};
status = meta_ptr_->UpdateTableFiles(update_files);
if (status.ok()) {
ENGINE_LOG_DEBUG << "New index file " << table_file.file_id_ << " of size " << index->PhysicalSize()
<< " bytes"
<< " from file " << origin_file.file_id_;
if (options_.insert_cache_immediately_) {
index->Cache();
}
} else {
// failed to update meta, mark the new file as to_delete, don't delete old file
origin_file.file_type_ = meta::TableFileSchema::TO_INDEX;
status = meta_ptr_->UpdateTableFile(origin_file);
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << origin_file.file_id_ << " to to_index";
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
status = meta_ptr_->UpdateTableFile(table_file);
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
}
} catch (std::exception& ex) {
std::string msg = "Build index encounter exception: " + std::string(ex.what());
ENGINE_LOG_ERROR << msg;
return Status(DB_ERROR, msg);
}
return Status::OK();
}
void
DBImpl::BackgroundBuildIndex() {
ENGINE_LOG_TRACE << "Background build index thread start";
@ -915,17 +768,6 @@ DBImpl::BackgroundBuildIndex() {
ENGINE_LOG_ERROR << "Building index failed: " << status.ToString();
}
}
// for (auto &file : to_index_files) {
// status = BuildIndex(file);
// if (!status.ok()) {
// ENGINE_LOG_ERROR << "Building index for " << file.id_ << " failed: " << status.ToString();
// }
//
// if (shutting_down_.load(std::memory_order_acquire)) {
// ENGINE_LOG_DEBUG << "Server will shutdown, skip build index action";
// break;
// }
// }
ENGINE_LOG_TRACE << "Background build index thread exit";
}

View File

@ -107,7 +107,7 @@ class DBImpl : public DB {
private:
Status
QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results);
uint64_t nprobe, const float* vectors, QueryResults& results);
void
BackgroundTimerTask();
@ -133,9 +133,6 @@ class DBImpl : public DB {
void
BackgroundBuildIndex();
Status
BuildIndex(const meta::TableFileSchema&);
Status
MemSerialize();

View File

@ -80,8 +80,7 @@ class ExecutionEngine {
Merge(const std::string& location) = 0;
virtual Status
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
bool hybrid) const = 0;
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0;
virtual std::shared_ptr<ExecutionEngine>
BuildIndex(const std::string& location, EngineType engine_type) = 0;

View File

@ -37,6 +37,7 @@
#include <utility>
#include <vector>
//#define ON_SEARCH
namespace milvus {
namespace engine {
@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) {
Status
ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
if (hybrid) {
auto key = location_ + ".quantizer";
auto quantizer =
std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key));
auto conf = std::make_shared<knowhere::QuantizerCfg>();
conf->gpu_id = device_id;
if (quantizer) {
// cache hit
conf->mode = 2;
auto new_index = index_->LoadData(quantizer->Data(), conf);
index_ = new_index;
} else {
auto pair = index_->CopyToGpuWithQuantizer(device_id);
index_ = pair.first;
// cache
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
}
return Status::OK();
}
@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
Status
ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
bool hybrid) const {
bool hybrid) {
if (index_type_ == EngineType::FAISS_IVFSQ8H) {
if (!hybrid) {
const std::string key = location_ + ".quantizer";
std::vector<uint64_t> gpus = scheduler::get_gpu_pool();
const int64_t NOT_FOUND = -1;
int64_t device_id = NOT_FOUND;
// cache hit
{
knowhere::QuantizerPtr quantizer = nullptr;
for (auto& gpu : gpus) {
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
if (auto cached_quantizer = cache->GetIndex(key)) {
device_id = gpu;
quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data();
}
}
if (device_id != NOT_FOUND) {
// cache hit
auto config = std::make_shared<knowhere::QuantizerCfg>();
config->gpu_id = device_id;
config->mode = 2;
auto new_index = index_->LoadData(quantizer, config);
index_ = new_index;
}
}
if (device_id == NOT_FOUND) {
// cache miss
std::vector<int64_t> all_free_mem;
for (auto& gpu : gpus) {
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
all_free_mem.push_back(free_mem);
}
auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
auto best_index = std::distance(all_free_mem.begin(), max_e);
device_id = gpus[best_index];
auto pair = index_->CopyToGpuWithQuantizer(device_id);
index_ = pair.first;
// cache
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
}
}
}
if (index_ == nullptr) {
ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search";
return Status(DB_ERROR, "index is null");

View File

@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine {
Status
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
bool hybrid = false) const override;
bool hybrid = false) override;
ExecutionEnginePtr
BuildIndex(const std::string& location, EngineType engine_type) override;

View File

@ -88,6 +88,11 @@ function(ExternalProject_Create_Cache project_name package_file install_path cac
file(REMOVE ${package_file})
endif()
string(REGEX REPLACE "(.+)/.+$" "\\1" package_dir ${package_file})
if(NOT EXISTS ${package_dir})
file(MAKE_DIRECTORY ${package_dir})
endif()
message(STATUS "Will create cached package file: ${package_file}")
ExternalProject_Add_Step(${project_name} package

View File

@ -233,17 +233,16 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
endforeach()
if(CUSTOMIZATION)
set(FAISS_SOURCE_URL "http://192.168.1.105:6060/jinhai/faiss/-/archive/${FAISS_VERSION}/faiss-${FAISS_VERSION}.tar.gz")
# set(FAISS_MD5 "a589663865a8558205533c8ac414278c")
# set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0
# set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0
# set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1
set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1
execute_process(COMMAND wget -q --method HEAD ${FAISS_SOURCE_URL} RESULT_VARIABLE return_code)
message(STATUS "Check the remote cache file ${FAISS_SOURCE_URL}. return code = ${return_code}")
execute_process(COMMAND wget -q --method HEAD ${FAISS_URL} RESULT_VARIABLE return_code)
message(STATUS "Check the remote cache file ${FAISS_URL}. return code = ${return_code}")
if (NOT return_code EQUAL 0)
set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz")
MESSAGE(FATAL_ERROR "Can't access to ${FAISS_URL}")
else()
# set(FAISS_MD5 "a589663865a8558205533c8ac414278c")
# set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0
# set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0
# set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1
set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1
endif()
else()
set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz")

View File

@ -1,7 +1,3 @@
set(TBB_DIR ${CORE_SOURCE_DIR}/thirdparty/tbb)
set(TBB_LIBRARIES ${TBB_DIR}/libtbb.so)
include_directories(${TBB_DIR}/include)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
@ -60,7 +56,6 @@ set(index_srcs
set(depend_libs
SPTAGLibStatic
${TBB_LIBRARIES}
faiss
openblas
lapack
@ -107,13 +102,6 @@ INSTALL(FILES ${OPENBLAS_REAL_STATIC_LIB}
DESTINATION lib
)
INSTALL(FILES ${CORE_SOURCE_DIR}/thirdparty/tbb/libtbb.so.2
DESTINATION lib
)
INSTALL(FILES ${CORE_SOURCE_DIR}/thirdparty/tbb/libtbb.so
DESTINATION lib
)
set(CORE_INCLUDE_DIRS
${CORE_SOURCE_DIR}/knowhere
${CORE_SOURCE_DIR}/thirdparty
@ -122,7 +110,6 @@ set(CORE_INCLUDE_DIRS
${FAISS_INCLUDE_DIR}
${OPENBLAS_INCLUDE_DIR}
${LAPACK_INCLUDE_DIR}
${CORE_SOURCE_DIR}/thirdparty/tbb/include
)
set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
@ -132,7 +119,6 @@ set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
# ${ARROW_INCLUDE_DIR}/arrow
# ${FAISS_PREFIX}/include/faiss
# ${OPENBLAS_INCLUDE_DIR}/
# ${CORE_SOURCE_DIR}/thirdparty/tbb/include/tbb
# DESTINATION
# include)
#

View File

@ -96,7 +96,7 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);
return new_idx;
} else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
}
}
@ -152,7 +152,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) {
gpu_mode = 1;
return q;
} else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
}
}
@ -215,7 +215,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res);
return sq_idx;
} else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
}
}
@ -242,7 +242,7 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c
q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
return std::make_pair(new_idx, q);
} else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
}
}

View File

@ -36,42 +36,47 @@ BinarySet
CPUKDTRNG::Serialize() {
std::vector<void*> index_blobs;
std::vector<int64_t> index_len;
index_ptr_->SaveIndexToMemory(index_blobs, index_len);
// TODO(zirui): dev
// index_ptr_->SaveIndexToMemory(index_blobs, index_len);
BinarySet binary_set;
auto sample = std::make_shared<uint8_t>();
sample.reset(static_cast<uint8_t*>(index_blobs[0]));
auto tree = std::make_shared<uint8_t>();
tree.reset(static_cast<uint8_t*>(index_blobs[1]));
auto graph = std::make_shared<uint8_t>();
graph.reset(static_cast<uint8_t*>(index_blobs[2]));
auto metadata = std::make_shared<uint8_t>();
metadata.reset(static_cast<uint8_t*>(index_blobs[3]));
binary_set.Append("samples", sample, index_len[0]);
binary_set.Append("tree", tree, index_len[1]);
binary_set.Append("graph", graph, index_len[2]);
binary_set.Append("metadata", metadata, index_len[3]);
//
// auto sample = std::make_shared<uint8_t>();
// sample.reset(static_cast<uint8_t*>(index_blobs[0]));
// auto tree = std::make_shared<uint8_t>();
// tree.reset(static_cast<uint8_t*>(index_blobs[1]));
// auto graph = std::make_shared<uint8_t>();
// graph.reset(static_cast<uint8_t*>(index_blobs[2]));
// auto metadata = std::make_shared<uint8_t>();
// metadata.reset(static_cast<uint8_t*>(index_blobs[3]));
//
// binary_set.Append("samples", sample, index_len[0]);
// binary_set.Append("tree", tree, index_len[1]);
// binary_set.Append("graph", graph, index_len[2]);
// binary_set.Append("metadata", metadata, index_len[3]);
return binary_set;
}
void
CPUKDTRNG::Load(const BinarySet& binary_set) {
std::vector<void*> index_blobs;
// TODO(zirui): dev
auto samples = binary_set.GetByName("samples");
index_blobs.push_back(samples->data.get());
auto tree = binary_set.GetByName("tree");
index_blobs.push_back(tree->data.get());
auto graph = binary_set.GetByName("graph");
index_blobs.push_back(graph->data.get());
auto metadata = binary_set.GetByName("metadata");
index_blobs.push_back(metadata->data.get());
index_ptr_->LoadIndexFromMemory(index_blobs);
// std::vector<void*> index_blobs;
//
// auto samples = binary_set.GetByName("samples");
// index_blobs.push_back(samples->data.get());
//
// auto tree = binary_set.GetByName("tree");
// index_blobs.push_back(tree->data.get());
//
// auto graph = binary_set.GetByName("graph");
// index_blobs.push_back(graph->data.get());
//
// auto metadata = binary_set.GetByName("metadata");
// index_blobs.push_back(metadata->data.get());
//
// index_ptr_->LoadIndexFromMemory(index_blobs);
}
// PreprocessorPtr

View File

@ -89,5 +89,3 @@ dkms.conf
/Wrappers/inc/AnnClient.java
/AnnService.users - Copy.props
/.vs
Release/
Debug/

View File

@ -1,44 +1,41 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h)
file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp)
file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/VectorSetReaders/*.h)
file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/VectorSetReaders/*.cpp)
include_directories(${PROJECT_SOURCE_DIR}/AnnService)
add_library (SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES})
target_link_libraries (SPTAGLib ${TBB_LIBRARIES})
target_link_libraries (SPTAGLib)
add_library (SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES})
set_target_properties(SPTAGLibStatic PROPERTIES OUTPUT_NAME SPTAGLib)
file(GLOB SERVER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Server/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h)
file(GLOB SERVER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Server/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp)
add_executable (server ${SERVER_FILES} ${SERVER_HDR_FILES})
target_link_libraries(server ${Boost_LIBRARIES} ${TBB_LIBRARIES})
target_link_libraries(server ${Boost_LIBRARIES})
file(GLOB CLIENT_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h)
file(GLOB CLIENT_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp)
add_executable (client ${CLIENT_FILES} ${CLIENT_HDR_FILES})
target_link_libraries(client ${Boost_LIBRARIES} ${TBB_LIBRARIES})
target_link_libraries(client ${Boost_LIBRARIES})
file(GLOB AGG_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/Aggregator/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h)
file(GLOB AGG_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/Aggregator/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp)
add_executable (aggregator ${AGG_FILES} ${AGG_HDR_FILES})
target_link_libraries(aggregator ${Boost_LIBRARIES} ${TBB_LIBRARIES})
target_link_libraries(aggregator ${Boost_LIBRARIES})
file(GLOB BUILDER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/VectorSetReaders/*.h)
file(GLOB BUILDER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/VectorSetReaders/*.cpp)
file(GLOB BUILDER_HDR_FILES ${HDR_FILES} ${PROJECT_SOURCE_DIR}/AnnService/inc/IndexBuilder/*.h)
file(GLOB BUILDER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexBuilder/*.cpp)
add_executable (indexbuilder ${BUILDER_FILES} ${BUILDER_HDR_FILES})
target_link_libraries(indexbuilder ${Boost_LIBRARIES} ${TBB_LIBRARIES})
target_link_libraries(indexbuilder ${Boost_LIBRARIES})
file(GLOB SEARCHER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexSearcher/*.cpp)
add_executable (indexsearcher ${SEARCHER_FILES} ${HDR_FILES})
target_link_libraries(indexsearcher ${Boost_LIBRARIES} ${TBB_LIBRARIES})
target_link_libraries(indexsearcher ${Boost_LIBRARIES})
install(TARGETS SPTAGLib SPTAGLibStatic server client aggregator indexbuilder indexsearcher
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
install(DIRECTORY inc DESTINATION include/sptag
FILES_MATCHING PATTERN "*.h")

View File

@ -149,25 +149,29 @@
<ClInclude Include="inc\Core\DefinitionList.h" />
<ClInclude Include="inc\Core\MetadataSet.h" />
<ClInclude Include="inc\Core\SearchQuery.h" />
<ClInclude Include="inc\Core\SearchResult.h" />
<ClInclude Include="inc\Core\VectorIndex.h" />
<ClInclude Include="inc\Core\VectorSet.h" />
<ClInclude Include="inc\Helper\ArgumentsParser.h" />
<ClInclude Include="inc\Helper\Base64Encode.h" />
<ClInclude Include="inc\Helper\BufferStream.h" />
<ClInclude Include="inc\Helper\CommonHelper.h" />
<ClInclude Include="inc\Helper\Concurrent.h" />
<ClInclude Include="inc\Helper\ConcurrentSet.h" />
<ClInclude Include="inc\Helper\SimpleIniReader.h" />
<ClInclude Include="inc\Helper\StringConvert.h" />
<ClInclude Include="inc\Core\Common\NeighborhoodGraph.h" />
<ClInclude Include="inc\Core\Common\RelativeNeighborhoodGraph.h" />
<ClInclude Include="inc\Core\Common\BKTree.h" />
<ClInclude Include="inc\Core\Common\KDTree.h" />
<ClInclude Include="inc\Helper\VectorSetReader.h" />
<ClInclude Include="inc\Helper\VectorSetReaders\DefaultReader.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\Core\BKT\BKTIndex.cpp" />
<ClCompile Include="src\Core\Common\NeighborhoodGraph.cpp" />
<ClCompile Include="src\Core\KDT\KDTIndex.cpp" />
<ClCompile Include="src\Core\Common\WorkSpacePool.cpp" />
<ClCompile Include="src\Core\CommonDataStructure.cpp" />
<ClCompile Include="src\Core\MetadataSet.cpp" />
<ClCompile Include="src\Core\VectorIndex.cpp" />
<ClCompile Include="src\Core\VectorSet.cpp" />
@ -176,18 +180,13 @@
<ClCompile Include="src\Helper\CommonHelper.cpp" />
<ClCompile Include="src\Helper\Concurrent.cpp" />
<ClCompile Include="src\Helper\SimpleIniReader.cpp" />
<ClCompile Include="src\Helper\VectorSetReader.cpp" />
<ClCompile Include="src\Helper\VectorSetReaders\DefaultReader.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
</ImportGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
</Target>
</Project>

View File

@ -38,6 +38,12 @@
<Filter Include="Source Files\Core\KDT">
<UniqueIdentifier>{8fb36afb-73ed-4c3d-8c9b-c3581d80c5d1}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\Helper\VectorSetReaders">
<UniqueIdentifier>{f7bc0bc7-1af5-4870-b8ee-fabdbabdb4c4}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\Helper\VectorSetReaders">
<UniqueIdentifier>{5c1449e0-38b7-4c82-976e-cbdc488d3fb5}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="inc\Core\Common.h">
@ -52,6 +58,9 @@
<ClInclude Include="inc\Core\SearchQuery.h">
<Filter>Header Files\Core</Filter>
</ClInclude>
<ClInclude Include="inc\Core\SearchResult.h">
<Filter>Header Files\Core</Filter>
</ClInclude>
<ClInclude Include="inc\Core\VectorIndex.h">
<Filter>Header Files\Core</Filter>
</ClInclude>
@ -130,11 +139,20 @@
<ClInclude Include="inc\Core\Common\BKTree.h">
<Filter>Header Files\Core\Common</Filter>
</ClInclude>
<ClInclude Include="inc\Helper\ConcurrentSet.h">
<Filter>Header Files\Helper</Filter>
</ClInclude>
<ClInclude Include="inc\Helper\BufferStream.h">
<Filter>Header Files\Helper</Filter>
</ClInclude>
<ClInclude Include="inc\Helper\VectorSetReaders\DefaultReader.h">
<Filter>Header Files\Helper\VectorSetReaders</Filter>
</ClInclude>
<ClInclude Include="inc\Helper\VectorSetReader.h">
<Filter>Header Files\Helper</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\Core\CommonDataStructure.cpp">
<Filter>Source Files\Core</Filter>
</ClCompile>
<ClCompile Include="src\Core\VectorIndex.cpp">
<Filter>Source Files\Core</Filter>
</ClCompile>
@ -171,6 +189,12 @@
<ClCompile Include="src\Core\Common\NeighborhoodGraph.cpp">
<Filter>Source Files\Core\Common</Filter>
</ClCompile>
<ClCompile Include="src\Helper\VectorSetReaders\DefaultReader.cpp">
<Filter>Source Files\Helper\VectorSetReaders</Filter>
</ClCompile>
<ClCompile Include="src\Helper\VectorSetReader.cpp">
<Filter>Source Files\Helper</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />

View File

@ -139,15 +139,11 @@
<ItemGroup>
<ClInclude Include="inc\IndexBuilder\Options.h" />
<ClInclude Include="inc\IndexBuilder\ThreadPool.h" />
<ClInclude Include="inc\IndexBuilder\VectorSetReader.h" />
<ClInclude Include="inc\IndexBuilder\VectorSetReaders\DefaultReader.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\IndexBuilder\main.cpp" />
<ClCompile Include="src\IndexBuilder\Options.cpp" />
<ClCompile Include="src\IndexBuilder\ThreadPool.cpp" />
<ClCompile Include="src\IndexBuilder\VectorSetReader.cpp" />
<ClCompile Include="src\IndexBuilder\VectorSetReaders\DefaultReader.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
@ -161,7 +157,6 @@
<Import Project="..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets" Condition="Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" />
<Import Project="..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets" Condition="Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" />
<Import Project="..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets" Condition="Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" />
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
</ImportGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
@ -174,6 +169,5 @@
<Error Condition="!Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets'))" />
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
</Target>
</Project>

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
@ -9,12 +9,6 @@
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Header Files\VectorSetReaders">
<UniqueIdentifier>{cf68b421-6a65-44f2-bf43-438b13940d7d}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\VectorSetReaders">
<UniqueIdentifier>{41ac91f9-6b6d-4341-8791-12f672d6ad5c}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="inc\IndexBuilder\Options.h">
@ -23,28 +17,16 @@
<ClInclude Include="inc\IndexBuilder\ThreadPool.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="inc\IndexBuilder\VectorSetReader.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="inc\IndexBuilder\VectorSetReaders\DefaultReader.h">
<Filter>Header Files\VectorSetReaders</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\IndexBuilder\Options.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\IndexBuilder\ThreadPool.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\IndexBuilder\VectorSetReader.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\IndexBuilder\VectorSetReaders\DefaultReader.cpp">
<Filter>Source Files\VectorSetReaders</Filter>
</ClCompile>
<ClCompile Include="src\IndexBuilder\main.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\IndexBuilder\ThreadPool.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -154,7 +154,6 @@
<Import Project="..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets" Condition="Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" />
<Import Project="..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets" Condition="Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" />
<Import Project="..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets" Condition="Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" />
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
</ImportGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
@ -167,6 +166,5 @@
<Error Condition="!Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets'))" />
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
</Target>
</Project>

View File

@ -137,7 +137,6 @@
<Import Project="..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets" Condition="Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" />
<Import Project="..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets" Condition="Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" />
<Import Project="..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets" Condition="Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" />
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
</ImportGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
@ -150,6 +149,5 @@
<Error Condition="!Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets'))" />
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
</Target>
</Project>

View File

@ -15,12 +15,12 @@
#include "../Common/WorkSpacePool.h"
#include "../Common/RelativeNeighborhoodGraph.h"
#include "../Common/BKTree.h"
#include "inc/Helper/ConcurrentSet.h"
#include "inc/Helper/SimpleIniReader.h"
#include "inc/Helper/StringConvert.h"
#include <functional>
#include <mutex>
#include <tbb/concurrent_unordered_set.h>
namespace SPTAG
{
@ -48,35 +48,38 @@ namespace SPTAG
std::string m_sBKTFilename;
std::string m_sGraphFilename;
std::string m_sDataPointsFilename;
std::string m_sDeleteDataPointsFilename;
std::mutex m_dataLock; // protect data and graph
tbb::concurrent_unordered_set<int> m_deletedID;
std::mutex m_dataAddLock; // protect data and graph
Helper::Concurrent::ConcurrentSet<SizeType> m_deletedID;
float m_fDeletePercentageForRefine;
std::unique_ptr<COMMON::WorkSpacePool> m_workSpacePool;
int m_iNumberOfThreads;
DistCalcMethod m_iDistCalcMethod;
float(*m_fComputeDistance)(const T* pX, const T* pY, int length);
float(*m_fComputeDistance)(const T* pX, const T* pY, DimensionType length);
int m_iMaxCheck;
int m_iThresholdOfNumberOfContinuousNoBetterPropagation;
int m_iNumberOfInitialDynamicPivots;
int m_iNumberOfOtherDynamicPivots;
public:
Index()
{
Index()
{
#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \
VarName = DefaultValue; \
#include "inc/Core/BKT/ParameterDefinitionList.h"
#undef DefineBKTParameter
m_fComputeDistance = COMMON::DistanceCalcSelector<T>(m_iDistCalcMethod);
}
m_pSamples.SetName("Vector");
m_fComputeDistance = COMMON::DistanceCalcSelector<T>(m_iDistCalcMethod);
}
~Index() {}
inline int GetNumSamples() const { return m_pSamples.R(); }
inline int GetFeatureDim() const { return m_pSamples.C(); }
inline SizeType GetNumSamples() const { return m_pSamples.R(); }
inline DimensionType GetFeatureDim() const { return m_pSamples.C(); }
inline int GetCurrMaxCheck() const { return m_iMaxCheck; }
inline int GetNumThreads() const { return m_iNumberOfThreads; }
@ -85,25 +88,41 @@ namespace SPTAG
inline VectorValueType GetVectorValueType() const { return GetEnumValueType<T>(); }
inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); }
inline const void* GetSample(const int idx) const { return (void*)m_pSamples[idx]; }
inline const void* GetSample(const SizeType idx) const { return (void*)m_pSamples[idx]; }
inline bool ContainSample(const SizeType idx) const { return !m_deletedID.contains(idx); }
inline bool NeedRefine() const { return m_deletedID.size() >= (size_t)(GetNumSamples() * m_fDeletePercentageForRefine); }
std::shared_ptr<std::vector<std::uint64_t>> BufferSize() const
{
std::shared_ptr<std::vector<std::uint64_t>> buffersize(new std::vector<std::uint64_t>);
buffersize->push_back(m_pSamples.BufferSize());
buffersize->push_back(m_pTrees.BufferSize());
buffersize->push_back(m_pGraph.BufferSize());
buffersize->push_back(m_deletedID.bufferSize());
return std::move(buffersize);
}
ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension);
ErrorCode SaveConfig(std::ostream& p_configout) const;
ErrorCode SaveIndexData(const std::string& p_folderPath);
ErrorCode SaveIndexData(const std::vector<std::ostream*>& p_indexStreams);
ErrorCode SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t>& p_indexBlobsLen);
ErrorCode LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs);
ErrorCode LoadConfig(Helper::IniReader& p_reader);
ErrorCode LoadIndexData(const std::string& p_folderPath);
ErrorCode LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs);
ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout);
ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader);
ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension);
ErrorCode SearchIndex(QueryResult &p_query) const;
ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension);
ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum);
ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr);
ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum);
ErrorCode DeleteIndex(const SizeType& p_id);
ErrorCode SetParameter(const char* p_param, const char* p_value);
std::string GetParameter(const char* p_param) const;
private:
ErrorCode RefineIndex(const std::string& p_folderPath);
void SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set<int> &p_deleted) const;
ErrorCode RefineIndex(const std::vector<std::ostream*>& p_indexStreams);
private:
void SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet<SizeType> &p_deleted) const;
void SearchIndexWithoutDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space) const;
};
} // namespace BKT

View File

@ -7,6 +7,7 @@
DefineBKTParameter(m_sBKTFilename, std::string, std::string("tree.bin"), "TreeFilePath")
DefineBKTParameter(m_sGraphFilename, std::string, std::string("graph.bin"), "GraphFilePath")
DefineBKTParameter(m_sDataPointsFilename, std::string, std::string("vectors.bin"), "VectorFilePath")
DefineBKTParameter(m_sDeleteDataPointsFilename, std::string, std::string("deletes.bin"), "DeleteVectorFilePath")
DefineBKTParameter(m_pTrees.m_iTreeNumber, int, 1L, "BKTNumber")
DefineBKTParameter(m_pTrees.m_iBKTKmeansK, int, 32L, "BKTKmeansK")
@ -14,11 +15,11 @@ DefineBKTParameter(m_pTrees.m_iBKTLeafSize, int, 8L, "BKTLeafSize")
DefineBKTParameter(m_pTrees.m_iSamples, int, 1000L, "Samples")
DefineBKTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TpTreeNumber")
DefineBKTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TPTNumber")
DefineBKTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize")
DefineBKTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTpTreeSplit")
DefineBKTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize")
DefineBKTParameter(m_pGraph.m_iNeighborhoodSize, DimensionType, 32L, "NeighborhoodSize")
DefineBKTParameter(m_pGraph.m_iNeighborhoodScale, int, 2L, "GraphNeighborhoodScale")
DefineBKTParameter(m_pGraph.m_iCEFScale, int, 2L, "GraphCEFScale")
DefineBKTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations")
@ -28,6 +29,7 @@ DefineBKTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckFor
DefineBKTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads")
DefineBKTParameter(m_iDistCalcMethod, SPTAG::DistCalcMethod, SPTAG::DistCalcMethod::Cosine, "DistCalcMethod")
DefineBKTParameter(m_fDeletePercentageForRefine, float, 0.4F, "DeletePercentageForRefine")
DefineBKTParameter(m_iMaxCheck, int, 8192L, "MaxCheck")
DefineBKTParameter(m_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation")
DefineBKTParameter(m_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots")

View File

@ -56,9 +56,10 @@ inline bool fileexists(const char* path) {
namespace SPTAG
{
typedef std::int32_t SizeType;
typedef std::int32_t DimensionType;
typedef std::uint32_t SizeType;
const SizeType MaxSize = (std::numeric_limits<SizeType>::max)();
const float MinDist = (std::numeric_limits<float>::min)();
const float MaxDist = (std::numeric_limits<float>::max)();
const float Epsilon = 0.000000001f;
@ -76,11 +77,6 @@ public:
#endif
};
// Type of number index.
typedef std::int32_t IndexType;
static_assert(std::is_integral<IndexType>::value, "IndexType must be integral type.");
enum class ErrorCode : std::uint16_t
{
#define DefineErrorCode(Name, Value) Name = Value,

View File

@ -24,34 +24,34 @@ namespace SPTAG
// node type for storing BKT
struct BKTNode
{
int centerid;
int childStart;
int childEnd;
SizeType centerid;
SizeType childStart;
SizeType childEnd;
BKTNode(int cid = -1) : centerid(cid), childStart(-1), childEnd(-1) {}
BKTNode(SizeType cid = -1) : centerid(cid), childStart(-1), childEnd(-1) {}
};
template <typename T>
struct KmeansArgs {
int _K;
int _D;
DimensionType _D;
int _T;
T* centers;
int* counts;
SizeType* counts;
float* newCenters;
int* newCounts;
char* label;
int* clusterIdx;
SizeType* newCounts;
int* label;
SizeType* clusterIdx;
float* clusterDist;
T* newTCenters;
KmeansArgs(int k, int dim, int datasize, int threadnum) : _K(k), _D(dim), _T(threadnum) {
KmeansArgs(int k, DimensionType dim, SizeType datasize, int threadnum) : _K(k), _D(dim), _T(threadnum) {
centers = new T[k * dim];
counts = new int[k];
counts = new SizeType[k];
newCenters = new float[threadnum * k * dim];
newCounts = new int[threadnum * k];
label = new char[datasize];
clusterIdx = new int[threadnum * k];
newCounts = new SizeType[threadnum * k];
label = new int[datasize];
clusterIdx = new SizeType[threadnum * k];
clusterDist = new float[threadnum * k];
newTCenters = new T[k * dim];
}
@ -68,7 +68,7 @@ namespace SPTAG
}
inline void ClearCounts() {
memset(newCounts, 0, sizeof(int) * _T * _K);
memset(newCounts, 0, sizeof(SizeType) * _T * _K);
}
inline void ClearCenters() {
@ -82,17 +82,17 @@ namespace SPTAG
}
}
void Shuffle(std::vector<int>& indices, int first, int last) {
int* pos = new int[_K];
void Shuffle(std::vector<SizeType>& indices, SizeType first, SizeType last) {
SizeType* pos = new SizeType[_K];
pos[0] = first;
for (int k = 1; k < _K; k++) pos[k] = pos[k - 1] + newCounts[k - 1];
for (int k = 0; k < _K; k++) {
if (newCounts[k] == 0) continue;
int i = pos[k];
SizeType i = pos[k];
while (newCounts[k] > 0) {
int swapid = pos[(int)(label[i])] + newCounts[(int)(label[i])] - 1;
newCounts[(int)(label[i])]--;
SizeType swapid = pos[label[i]] + newCounts[label[i]] - 1;
newCounts[label[i]]--;
std::swap(indices[i], indices[swapid]);
std::swap(label[i], label[swapid]);
}
@ -114,59 +114,59 @@ namespace SPTAG
m_iSamples(other.m_iSamples) {}
~BKTree() {}
inline const BKTNode& operator[](int index) const { return m_pTreeRoots[index]; }
inline BKTNode& operator[](int index) { return m_pTreeRoots[index]; }
inline const BKTNode& operator[](SizeType index) const { return m_pTreeRoots[index]; }
inline BKTNode& operator[](SizeType index) { return m_pTreeRoots[index]; }
inline int size() const { return (int)m_pTreeRoots.size(); }
inline SizeType size() const { return (SizeType)m_pTreeRoots.size(); }
inline const std::unordered_map<int, int>& GetSampleMap() const { return m_pSampleCenterMap; }
inline const std::unordered_map<SizeType, SizeType>& GetSampleMap() const { return m_pSampleCenterMap; }
template <typename T>
void BuildTrees(VectorIndex* index, std::vector<int>* indices = nullptr)
void BuildTrees(VectorIndex* index, std::vector<SizeType>* indices = nullptr)
{
struct BKTStackItem {
int index, first, last;
BKTStackItem(int index_, int first_, int last_) : index(index_), first(first_), last(last_) {}
SizeType index, first, last;
BKTStackItem(SizeType index_, SizeType first_, SizeType last_) : index(index_), first(first_), last(last_) {}
};
std::stack<BKTStackItem> ss;
std::vector<int> localindices;
std::vector<SizeType> localindices;
if (indices == nullptr) {
localindices.resize(index->GetNumSamples());
for (int i = 0; i < index->GetNumSamples(); i++) localindices[i] = i;
for (SizeType i = 0; i < index->GetNumSamples(); i++) localindices[i] = i;
}
else {
localindices.assign(indices->begin(), indices->end());
}
KmeansArgs<T> args(m_iBKTKmeansK, index->GetFeatureDim(), (int)localindices.size(), omp_get_num_threads());
KmeansArgs<T> args(m_iBKTKmeansK, index->GetFeatureDim(), (SizeType)localindices.size(), omp_get_num_threads());
m_pSampleCenterMap.clear();
for (char i = 0; i < m_iTreeNumber; i++)
{
std::random_shuffle(localindices.begin(), localindices.end());
m_pTreeStart.push_back((int)m_pTreeRoots.size());
m_pTreeRoots.push_back(BKTNode((int)localindices.size()));
m_pTreeStart.push_back((SizeType)m_pTreeRoots.size());
m_pTreeRoots.push_back(BKTNode((SizeType)localindices.size()));
std::cout << "Start to build BKTree " << i + 1 << std::endl;
ss.push(BKTStackItem(m_pTreeStart[i], 0, (int)localindices.size()));
ss.push(BKTStackItem(m_pTreeStart[i], 0, (SizeType)localindices.size()));
while (!ss.empty()) {
BKTStackItem item = ss.top(); ss.pop();
int newBKTid = (int)m_pTreeRoots.size();
SizeType newBKTid = (SizeType)m_pTreeRoots.size();
m_pTreeRoots[item.index].childStart = newBKTid;
if (item.last - item.first <= m_iBKTLeafSize) {
for (int j = item.first; j < item.last; j++) {
for (SizeType j = item.first; j < item.last; j++) {
m_pTreeRoots.push_back(BKTNode(localindices[j]));
}
}
else { // clustering the data into BKTKmeansK clusters
int numClusters = KmeansClustering(index, localindices, item.first, item.last, args);
if (numClusters <= 1) {
int end = min(item.last + 1, (int)localindices.size());
SizeType end = min(item.last + 1, (SizeType)localindices.size());
std::sort(localindices.begin() + item.first, localindices.begin() + end);
m_pTreeRoots[item.index].centerid = localindices[item.first];
m_pTreeRoots[item.index].childStart = -m_pTreeRoots[item.index].childStart;
for (int j = item.first + 1; j < end; j++) {
for (SizeType j = item.first + 1; j < end; j++) {
m_pTreeRoots.push_back(BKTNode(localindices[j]));
m_pSampleCenterMap[localindices[j]] = m_pTreeRoots[item.index].centerid;
}
@ -181,53 +181,36 @@ namespace SPTAG
}
}
}
m_pTreeRoots[item.index].childEnd = (int)m_pTreeRoots.size();
m_pTreeRoots[item.index].childEnd = (SizeType)m_pTreeRoots.size();
}
std::cout << i + 1 << " BKTree built, " << m_pTreeRoots.size() - m_pTreeStart[i] << " " << localindices.size() << std::endl;
}
}
bool SaveTrees(void **pKDTMemFile, int64_t &len) const
inline std::uint64_t BufferSize() const
{
int treeNodeSize = (int)m_pTreeRoots.size();
size_t size = sizeof(int) +
sizeof(int) * m_iTreeNumber +
sizeof(int) +
sizeof(BKTNode) * treeNodeSize;
char *mem = (char*)malloc(size);
if (mem == NULL) return false;
auto ptr = mem;
*(int*)ptr = m_iTreeNumber;
ptr += sizeof(int);
memcpy(ptr, m_pTreeStart.data(), sizeof(int) * m_iTreeNumber);
ptr += sizeof(int) * m_iTreeNumber;
*(int*)ptr = treeNodeSize;
ptr += sizeof(int);
memcpy(ptr, m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize);
*pKDTMemFile = mem;
len = size;
return sizeof(int) + sizeof(SizeType) * m_iTreeNumber +
sizeof(SizeType) + sizeof(BKTNode) * m_pTreeRoots.size();
}
bool SaveTrees(std::ostream& p_outstream) const
{
p_outstream.write((char*)&m_iTreeNumber, sizeof(int));
p_outstream.write((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber);
SizeType treeNodeSize = (SizeType)m_pTreeRoots.size();
p_outstream.write((char*)&treeNodeSize, sizeof(SizeType));
p_outstream.write((char*)m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize);
std::cout << "Save BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
return true;
}
bool SaveTrees(std::string sTreeFileName) const
{
std::cout << "Save BKT to " << sTreeFileName << std::endl;
FILE *fp = fopen(sTreeFileName.c_str(), "wb");
if (fp == NULL) return false;
fwrite(&m_iTreeNumber, sizeof(int), 1, fp);
fwrite(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp);
int treeNodeSize = (int)m_pTreeRoots.size();
fwrite(&treeNodeSize, sizeof(int), 1, fp);
fwrite(m_pTreeRoots.data(), sizeof(BKTNode), treeNodeSize, fp);
fclose(fp);
std::cout << "Save BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
std::ofstream output(sTreeFileName, std::ios::binary);
if (!output.is_open()) return false;
SaveTrees(output);
output.close();
return true;
}
@ -236,31 +219,32 @@ namespace SPTAG
m_iTreeNumber = *((int*)pBKTMemFile);
pBKTMemFile += sizeof(int);
m_pTreeStart.resize(m_iTreeNumber);
memcpy(m_pTreeStart.data(), pBKTMemFile, sizeof(int) * m_iTreeNumber);
pBKTMemFile += sizeof(int)*m_iTreeNumber;
memcpy(m_pTreeStart.data(), pBKTMemFile, sizeof(SizeType) * m_iTreeNumber);
pBKTMemFile += sizeof(SizeType)*m_iTreeNumber;
int treeNodeSize = *((int*)pBKTMemFile);
pBKTMemFile += sizeof(int);
SizeType treeNodeSize = *((SizeType*)pBKTMemFile);
pBKTMemFile += sizeof(SizeType);
m_pTreeRoots.resize(treeNodeSize);
memcpy(m_pTreeRoots.data(), pBKTMemFile, sizeof(BKTNode) * treeNodeSize);
std::cout << "Load BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
return true;
}
bool LoadTrees(std::string sTreeFileName)
{
std::cout << "Load BKT From " << sTreeFileName << std::endl;
FILE *fp = fopen(sTreeFileName.c_str(), "rb");
if (fp == NULL) return false;
std::ifstream input(sTreeFileName, std::ios::binary);
if (!input.is_open()) return false;
fread(&m_iTreeNumber, sizeof(int), 1, fp);
input.read((char*)&m_iTreeNumber, sizeof(int));
m_pTreeStart.resize(m_iTreeNumber);
fread(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp);
input.read((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber);
int treeNodeSize;
fread(&treeNodeSize, sizeof(int), 1, fp);
SizeType treeNodeSize;
input.read((char*)&treeNodeSize, sizeof(SizeType));
m_pTreeRoots.resize(treeNodeSize);
fread(m_pTreeRoots.data(), sizeof(BKTNode), treeNodeSize, fp);
fclose(fp);
input.read((char*)m_pTreeRoots.data(), sizeof(BKTNode) * treeNodeSize);
input.close();
std::cout << "Load BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
return true;
}
@ -274,9 +258,9 @@ namespace SPTAG
p_space.m_SPTQueue.insert(COMMON::HeapCell(m_pTreeStart[i], p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(node.centerid))));
}
else {
for (int begin = node.childStart; begin < node.childEnd; begin++) {
int index = m_pTreeRoots[begin].centerid;
p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index))));
for (SizeType begin = node.childStart; begin < node.childEnd; begin++) {
SizeType index = m_pTreeRoots[begin].centerid;
p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index))));
}
}
}
@ -301,8 +285,8 @@ namespace SPTAG
if (!p_space.CheckAndSet(tnode.centerid)) {
p_space.m_NGQueue.insert(COMMON::HeapCell(tnode.centerid, bcell.distance));
}
for (int begin = tnode.childStart; begin < tnode.childEnd; begin++) {
int index = m_pTreeRoots[begin].centerid;
for (SizeType begin = tnode.childStart; begin < tnode.childEnd; begin++) {
SizeType index = m_pTreeRoots[begin].centerid;
p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index))));
}
}
@ -313,24 +297,24 @@ namespace SPTAG
template <typename T>
float KmeansAssign(VectorIndex* p_index,
std::vector<int>& indices,
const int first, const int last, KmeansArgs<T>& args, const bool updateCenters) const {
std::vector<SizeType>& indices,
const SizeType first, const SizeType last, KmeansArgs<T>& args, const bool updateCenters) const {
float currDist = 0;
int threads = omp_get_num_threads();
float lambda = (updateCenters) ? COMMON::Utils::GetBase<T>() * COMMON::Utils::GetBase<T>() / (100.0f * (last - first)) : 0.0f;
int subsize = (last - first - 1) / threads + 1;
SizeType subsize = (last - first - 1) / threads + 1;
#pragma omp parallel for
for (int tid = 0; tid < threads; tid++)
{
int istart = first + tid * subsize;
int iend = min(first + (tid + 1) * subsize, last);
int *inewCounts = args.newCounts + tid * m_iBKTKmeansK;
SizeType istart = first + tid * subsize;
SizeType iend = min(first + (tid + 1) * subsize, last);
SizeType *inewCounts = args.newCounts + tid * m_iBKTKmeansK;
float *inewCenters = args.newCenters + tid * m_iBKTKmeansK * p_index->GetFeatureDim();
int * iclusterIdx = args.clusterIdx + tid * m_iBKTKmeansK;
SizeType * iclusterIdx = args.clusterIdx + tid * m_iBKTKmeansK;
float * iclusterDist = args.clusterDist + tid * m_iBKTKmeansK;
float idist = 0;
for (int i = istart; i < iend; i++) {
for (SizeType i = istart; i < iend; i++) {
int clusterid = 0;
float smallestDist = MaxDist;
for (int k = 0; k < m_iBKTKmeansK; k++) {
@ -345,7 +329,7 @@ namespace SPTAG
if (updateCenters) {
const T* v = (const T*)p_index->GetSample(indices[i]);
float* center = inewCenters + clusterid*p_index->GetFeatureDim();
for (int j = 0; j < p_index->GetFeatureDim(); j++) center[j] += v[j];
for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) center[j] += v[j];
if (smallestDist > iclusterDist[clusterid]) {
iclusterDist[clusterid] = smallestDist;
iclusterIdx[clusterid] = indices[i];
@ -369,36 +353,50 @@ namespace SPTAG
if (updateCenters) {
for (int i = 1; i < threads; i++) {
float* currCenter = args.newCenters + i*m_iBKTKmeansK*p_index->GetFeatureDim();
for (int j = 0; j < m_iBKTKmeansK * p_index->GetFeatureDim(); j++) args.newCenters[j] += currCenter[j];
for (size_t j = 0; j < ((size_t)m_iBKTKmeansK) * p_index->GetFeatureDim(); j++) args.newCenters[j] += currCenter[j];
for (int k = 0; k < m_iBKTKmeansK; k++) {
if (args.clusterIdx[i*m_iBKTKmeansK + k] != -1 && args.clusterDist[i*m_iBKTKmeansK + k] > args.clusterDist[k]) {
args.clusterDist[k] = args.clusterDist[i*m_iBKTKmeansK + k];
args.clusterIdx[k] = args.clusterIdx[i*m_iBKTKmeansK + k];
}
}
}
int maxcluster = 0;
for (int k = 1; k < m_iBKTKmeansK; k++) if (args.newCounts[maxcluster] < args.newCounts[k]) maxcluster = k;
int maxid = maxcluster;
for (int tid = 1; tid < threads; tid++) {
if (args.clusterDist[maxid] < args.clusterDist[tid * m_iBKTKmeansK + maxcluster]) maxid = tid * m_iBKTKmeansK + maxcluster;
int maxcluster = -1;
SizeType maxCount = 0;
for (int k = 0; k < m_iBKTKmeansK; k++) {
if (args.newCounts[k] > maxCount && DistanceUtils::ComputeL2Distance((T*)p_index->GetSample(args.clusterIdx[k]), args.centers + k * p_index->GetFeatureDim(), p_index->GetFeatureDim()) > 1e-6)
{
maxcluster = k;
maxCount = args.newCounts[k];
}
}
if (args.clusterIdx[maxid] < 0 || args.clusterIdx[maxid] >= p_index->GetNumSamples())
std::cout << "first:" << first << " last:" << last << " maxcluster:" << maxcluster << "(" << args.newCounts[maxcluster] << ") Error maxid:" << maxid << " dist:" << args.clusterDist[maxid] << std::endl;
maxid = args.clusterIdx[maxid];
if (maxcluster != -1 && (args.clusterIdx[maxcluster] < 0 || args.clusterIdx[maxcluster] >= p_index->GetNumSamples()))
std::cout << "first:" << first << " last:" << last << " maxcluster:" << maxcluster << "(" << args.newCounts[maxcluster] << ") Error dist:" << args.clusterDist[maxcluster] << std::endl;
for (int k = 0; k < m_iBKTKmeansK; k++) {
T* TCenter = args.newTCenters + k * p_index->GetFeatureDim();
if (args.newCounts[k] == 0) {
//int nextid = Utils::rand_int(last, first);
//while (args.label[nextid] != maxcluster) nextid = Utils::rand_int(last, first);
int nextid = maxid;
std::memcpy(TCenter, p_index->GetSample(nextid), sizeof(T)*p_index->GetFeatureDim());
if (maxcluster != -1) {
//int nextid = Utils::rand_int(last, first);
//while (args.label[nextid] != maxcluster) nextid = Utils::rand_int(last, first);
SizeType nextid = args.clusterIdx[maxcluster];
std::memcpy(TCenter, p_index->GetSample(nextid), sizeof(T)*p_index->GetFeatureDim());
}
else {
std::memcpy(TCenter, args.centers + k * p_index->GetFeatureDim(), sizeof(T)*p_index->GetFeatureDim());
}
}
else {
float* currCenters = args.newCenters + k * p_index->GetFeatureDim();
for (int j = 0; j < p_index->GetFeatureDim(); j++) currCenters[j] /= args.newCounts[k];
for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) currCenters[j] /= args.newCounts[k];
if (p_index->GetDistCalcMethod() == DistCalcMethod::Cosine) {
COMMON::Utils::Normalize(currCenters, p_index->GetFeatureDim(), COMMON::Utils::GetBase<T>());
}
for (int j = 0; j < p_index->GetFeatureDim(); j++) TCenter[j] = (T)(currCenters[j]);
for (DimensionType j = 0; j < p_index->GetFeatureDim(); j++) TCenter[j] = (T)(currCenters[j]);
}
}
}
@ -417,14 +415,14 @@ namespace SPTAG
template <typename T>
int KmeansClustering(VectorIndex* p_index,
std::vector<int>& indices, const int first, const int last, KmeansArgs<T>& args) const {
std::vector<SizeType>& indices, const SizeType first, const SizeType last, KmeansArgs<T>& args) const {
int iterLimit = 100;
int batchEnd = min(first + m_iSamples, last);
SizeType batchEnd = min(first + m_iSamples, last);
float currDiff, currDist, minClusterDist = MaxDist;
for (int numKmeans = 0; numKmeans < 3; numKmeans++) {
for (int k = 0; k < m_iBKTKmeansK; k++) {
int randid = COMMON::Utils::rand_int(last, first);
SizeType randid = COMMON::Utils::rand(last, first);
std::memcpy(args.centers + k*p_index->GetFeatureDim(), p_index->GetSample(indices[randid]), sizeof(T)*p_index->GetFeatureDim());
}
args.ClearCounts();
@ -432,7 +430,7 @@ namespace SPTAG
if (currDist < minClusterDist) {
minClusterDist = currDist;
memcpy(args.newTCenters, args.centers, sizeof(T)*m_iBKTKmeansK*p_index->GetFeatureDim());
memcpy(args.counts, args.newCounts, sizeof(int) * m_iBKTKmeansK);
memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK);
}
}
@ -446,7 +444,7 @@ namespace SPTAG
args.ClearCounts();
args.ClearDists(-MaxDist);
currDist = KmeansAssign(p_index, indices, first, batchEnd, args, true);
memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK);
memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK);
currDiff = 0;
for (int k = 0; k < m_iBKTKmeansK; k++) {
@ -466,7 +464,7 @@ namespace SPTAG
args.ClearCounts();
args.ClearDists(MaxDist);
currDist = KmeansAssign(p_index, indices, first, last, args, false);
memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK);
memcpy(args.counts, args.newCounts, sizeof(SizeType) * m_iBKTKmeansK);
int numClusters = 0;
for (int i = 0; i < m_iBKTKmeansK; i++) if (args.counts[i] > 0) numClusters++;
@ -480,9 +478,9 @@ namespace SPTAG
}
private:
std::vector<int> m_pTreeStart;
std::vector<SizeType> m_pTreeStart;
std::vector<BKTNode> m_pTreeRoots;
std::unordered_map<int, int> m_pSampleCenterMap;
std::unordered_map<SizeType, SizeType> m_pSampleCenterMap;
public:
int m_iTreeNumber, m_iBKTKmeansK, m_iBKTLeafSize, m_iSamples;

View File

@ -36,9 +36,9 @@ namespace SPTAG
{
class Utils {
public:
static int rand_int(int high = RAND_MAX, int low = 0) // Generates a random int value.
static SizeType rand(SizeType high = MaxSize, SizeType low = 0) // Generates a random int value.
{
return low + (int)(float(high - low)*(std::rand() / (RAND_MAX + 1.0)));
return low + (SizeType)(float(high - low)*(std::rand() / (RAND_MAX + 1.0)));
}
static inline float atomic_float_add(volatile float* ptr, const float operand)
@ -61,11 +61,11 @@ namespace SPTAG
}
}
static double GetVector(char* cstr, const char* sep, std::vector<float>& arr, int& NumDim) {
static double GetVector(char* cstr, const char* sep, std::vector<float>& arr, DimensionType& NumDim) {
char* current;
char* context = NULL;
int i = 0;
DimensionType i = 0;
double sum = 0;
arr.clear();
current = strtok_s(cstr, sep, &context);
@ -90,23 +90,23 @@ namespace SPTAG
}
template <typename T>
static void Normalize(T* arr, int col, int base) {
static void Normalize(T* arr, DimensionType col, int base) {
double vecLen = 0;
for (int j = 0; j < col; j++) {
for (DimensionType j = 0; j < col; j++) {
double val = arr[j];
vecLen += val * val;
}
vecLen = std::sqrt(vecLen);
if (vecLen < 1e-6) {
T val = (T)(1.0 / std::sqrt((double)col) * base);
for (int j = 0; j < col; j++) arr[j] = val;
for (DimensionType j = 0; j < col; j++) arr[j] = val;
}
else {
for (int j = 0; j < col; j++) arr[j] = (T)(arr[j] / vecLen * base);
for (DimensionType j = 0; j < col; j++) arr[j] = (T)(arr[j] / vecLen * base);
}
}
static size_t ProcessLine(std::string& currentLine, std::vector<float>& arr, int& D, int base, DistCalcMethod distCalcMethod) {
static size_t ProcessLine(std::string& currentLine, std::vector<float>& arr, DimensionType& D, int base, DistCalcMethod distCalcMethod) {
size_t index;
double vecLen;
if (currentLine.length() == 0 || (index = currentLine.find_last_of("\t")) == std::string::npos || (vecLen = GetVector(const_cast<char*>(currentLine.c_str() + index + 1), "|", arr, D)) < -1) {
@ -121,10 +121,10 @@ namespace SPTAG
}
template <typename T>
static void PrepareQuerys(std::ifstream& inStream, std::vector<std::string>& qString, std::vector<std::vector<T>>& Query, int& NumQuery, int& NumDim, DistCalcMethod distCalcMethod, int base) {
static void PrepareQuerys(std::ifstream& inStream, std::vector<std::string>& qString, std::vector<std::vector<T>>& Query, SizeType& NumQuery, DimensionType& NumDim, DistCalcMethod distCalcMethod, int base) {
std::string currentLine;
std::vector<float> arr;
int i = 0;
SizeType i = 0;
size_t index;
while ((NumQuery < 0 || i < NumQuery) && !inStream.eof()) {
std::getline(inStream, currentLine);
@ -132,9 +132,9 @@ namespace SPTAG
continue;
}
qString.push_back(currentLine.substr(0, index));
if (Query.size() < i + 1) Query.push_back(std::vector<T>(NumDim, 0));
if ((SizeType)Query.size() < i + 1) Query.push_back(std::vector<T>(NumDim, 0));
for (int j = 0; j < NumDim; j++) Query[i][j] = (T)arr[j];
for (DimensionType j = 0; j < NumDim; j++) Query[i][j] = (T)arr[j];
i++;
}
NumQuery = i;
@ -149,12 +149,12 @@ namespace SPTAG
return 1;
}
static inline void AddNeighbor(int idx, float dist, int *neighbors, float *dists, int size)
static inline void AddNeighbor(SizeType idx, float dist, SizeType *neighbors, float *dists, DimensionType size)
{
size--;
if (dist < dists[size] || (dist == dists[size] && idx < neighbors[size]))
{
int nb;
DimensionType nb;
for (nb = 0; nb <= size && neighbors[nb] != idx; nb++);
if (nb > size)

View File

@ -13,158 +13,18 @@ namespace SPTAG
{
namespace COMMON
{
const int bufsize = 1024 * 1024 * 1024;
const int bufsize = 1 << 30;
class DataUtils {
public:
template <typename T>
static void ProcessTSVData(int id, int threadbase, std::uint64_t blocksize,
std::string filename, std::string outfile, std::string outmetafile, std::string outmetaindexfile,
std::atomic_int& numSamples, int& D, DistCalcMethod distCalcMethod) {
std::ifstream inputStream(filename);
if (!inputStream.is_open()) {
std::cerr << "unable to open file " + filename << std::endl;
throw MyException("unable to open file " + filename);
exit(1);
}
std::ofstream outputStream, metaStream_out, metaStream_index;
outputStream.open(outfile + std::to_string(id + threadbase), std::ofstream::binary);
metaStream_out.open(outmetafile + std::to_string(id + threadbase), std::ofstream::binary);
metaStream_index.open(outmetaindexfile + std::to_string(id + threadbase), std::ofstream::binary);
if (!outputStream.is_open() || !metaStream_out.is_open() || !metaStream_index.is_open()) {
std::cerr << "unable to open output file " << outfile << " " << outmetafile << " " << outmetaindexfile << std::endl;
throw MyException("unable to open output files");
exit(1);
}
std::vector<float> arr;
std::vector<T> sample;
int base = 1;
if (distCalcMethod == DistCalcMethod::Cosine) {
base = Utils::GetBase<T>();
}
std::uint64_t writepos = 0;
int sampleSize = 0;
std::uint64_t totalread = 0;
std::streamoff startpos = id * blocksize;
#ifndef _MSC_VER
int enter_size = 1;
#else
int enter_size = 1;
#endif
std::string currentLine;
size_t index;
inputStream.seekg(startpos, std::ifstream::beg);
if (id != 0) {
std::getline(inputStream, currentLine);
totalread += currentLine.length() + enter_size;
}
std::cout << "Begin thread " << id << " begin at:" << (startpos + totalread) << std::endl;
while (!inputStream.eof() && totalread <= blocksize) {
std::getline(inputStream, currentLine);
if (currentLine.length() <= enter_size || (index = Utils::ProcessLine(currentLine, arr, D, base, distCalcMethod)) < 0) {
totalread += currentLine.length() + enter_size;
continue;
}
sample.resize(D);
for (int j = 0; j < D; j++) sample[j] = (T)arr[j];
outputStream.write((char *)(sample.data()), sizeof(T)*D);
metaStream_index.write((char *)&writepos, sizeof(std::uint64_t));
metaStream_out.write(currentLine.c_str(), index);
writepos += index;
sampleSize += 1;
totalread += currentLine.length() + enter_size;
}
metaStream_index.write((char *)&writepos, sizeof(std::uint64_t));
metaStream_index.write((char *)&sampleSize, sizeof(int));
inputStream.close();
outputStream.close();
metaStream_out.close();
metaStream_index.close();
numSamples.fetch_add(sampleSize);
std::cout << "Finish Thread[" << id << ", " << sampleSize << "] at:" << (startpos + totalread) << std::endl;
}
static void MergeData(int threadbase, std::string outfile, std::string outmetafile, std::string outmetaindexfile,
std::atomic_int& numSamples, int D) {
std::ifstream inputStream;
std::ofstream outputStream;
char * buf = new char[bufsize];
std::uint64_t * offsets;
int partSamples;
int metaSamples = 0;
std::uint64_t lastoff = 0;
outputStream.open(outfile, std::ofstream::binary);
outputStream.write((char *)&numSamples, sizeof(int));
outputStream.write((char *)&D, sizeof(int));
for (int i = 0; i < threadbase; i++) {
std::string file = outfile + std::to_string(i);
inputStream.open(file, std::ifstream::binary);
while (!inputStream.eof()) {
inputStream.read(buf, bufsize);
outputStream.write(buf, inputStream.gcount());
}
inputStream.close();
remove(file.c_str());
}
outputStream.close();
outputStream.open(outmetafile, std::ofstream::binary);
for (int i = 0; i < threadbase; i++) {
std::string file = outmetafile + std::to_string(i);
inputStream.open(file, std::ifstream::binary);
while (!inputStream.eof()) {
inputStream.read(buf, bufsize);
outputStream.write(buf, inputStream.gcount());
}
inputStream.close();
remove(file.c_str());
}
outputStream.close();
delete[] buf;
outputStream.open(outmetaindexfile, std::ofstream::binary);
outputStream.write((char *)&numSamples, sizeof(int));
for (int i = 0; i < threadbase; i++) {
std::string file = outmetaindexfile + std::to_string(i);
inputStream.open(file, std::ifstream::binary);
inputStream.seekg(-((long long)sizeof(int)), inputStream.end);
inputStream.read((char *)&partSamples, sizeof(int));
offsets = new std::uint64_t[partSamples + 1];
inputStream.seekg(0, inputStream.beg);
inputStream.read((char *)offsets, sizeof(std::uint64_t)*(partSamples + 1));
inputStream.close();
remove(file.c_str());
for (int j = 0; j < partSamples + 1; j++)
offsets[j] += lastoff;
outputStream.write((char *)offsets, sizeof(std::uint64_t)*partSamples);
lastoff = offsets[partSamples];
metaSamples += partSamples;
delete[] offsets;
}
outputStream.write((char *)&lastoff, sizeof(std::uint64_t));
outputStream.close();
std::cout << "numSamples:" << numSamples << " metaSamples:" << metaSamples << " D:" << D << std::endl;
}
static bool MergeIndex(const std::string& p_vectorfile1, const std::string& p_metafile1, const std::string& p_metaindexfile1,
const std::string& p_vectorfile2, const std::string& p_metafile2, const std::string& p_metaindexfile2) {
std::ifstream inputStream1, inputStream2;
std::ofstream outputStream;
char * buf = new char[bufsize];
int R1, R2, C1, C2;
std::unique_ptr<char[]> bufferHolder(new char[bufsize]);
char * buf = bufferHolder.get();
SizeType R1, R2;
DimensionType C1, C2;
#define MergeVector(inputStream, vectorFile, R, C) \
inputStream.open(vectorFile, std::ifstream::binary); \
@ -172,8 +32,8 @@ namespace SPTAG
std::cout << "Cannot open vector file: " << vectorFile <<"!" << std::endl; \
return false; \
} \
inputStream.read((char *)&(R), sizeof(int)); \
inputStream.read((char *)&(C), sizeof(int)); \
inputStream.read((char *)&(R), sizeof(SizeType)); \
inputStream.read((char *)&(C), sizeof(DimensionType)); \
MergeVector(inputStream1, p_vectorfile1, R1, C1)
MergeVector(inputStream2, p_vectorfile2, R2, C2)
@ -185,8 +45,8 @@ namespace SPTAG
}
R1 += R2;
outputStream.open(p_vectorfile1 + "_tmp", std::ofstream::binary);
outputStream.write((char *)&R1, sizeof(int));
outputStream.write((char *)&C1, sizeof(int));
outputStream.write((char *)&R1, sizeof(SizeType));
outputStream.write((char *)&C1, sizeof(DimensionType));
while (!inputStream1.eof()) {
inputStream1.read(buf, bufsize);
outputStream.write(buf, inputStream1.gcount());
@ -218,26 +78,22 @@ namespace SPTAG
outputStream.close();
delete[] buf;
std::uint64_t * offsets;
int partSamples;
std::uint64_t * offsets = reinterpret_cast<std::uint64_t*>(buf);
std::uint64_t lastoff = 0;
outputStream.open(p_metaindexfile1 + "_tmp", std::ofstream::binary);
outputStream.write((char *)&R1, sizeof(int));
outputStream.write((char *)&R1, sizeof(SizeType));
#define MergeMetaIndex(inputStream, metaIndexFile) \
inputStream.open(metaIndexFile, std::ifstream::binary); \
if (!inputStream.is_open()) { \
std::cout << "Cannot open meta index file: " << metaIndexFile << "!" << std::endl; \
return false; \
} \
inputStream.read((char *)&partSamples, sizeof(int)); \
offsets = new std::uint64_t[partSamples + 1]; \
inputStream.read((char *)offsets, sizeof(std::uint64_t)*(partSamples + 1)); \
inputStream.read((char *)&R2, sizeof(SizeType)); \
inputStream.read((char *)offsets, sizeof(std::uint64_t)*(R2 + 1)); \
inputStream.close(); \
for (int j = 0; j < partSamples + 1; j++) offsets[j] += lastoff; \
outputStream.write((char *)offsets, sizeof(std::uint64_t)*partSamples); \
lastoff = offsets[partSamples]; \
delete[] offsets; \
for (SizeType j = 0; j < R2 + 1; j++) offsets[j] += lastoff; \
outputStream.write((char *)offsets, sizeof(std::uint64_t)*R2); \
lastoff = offsets[R2]; \
MergeMetaIndex(inputStream1, p_metaindexfile1)
MergeMetaIndex(inputStream2, p_metaindexfile2)
@ -253,36 +109,6 @@ namespace SPTAG
std::cout << "Merged -> numSamples:" << R1 << " D:" << C1 << std::endl;
return true;
}
template <typename T>
static void ParseData(std::string filenames, std::string outfile, std::string outmetafile, std::string outmetaindexfile,
int threadnum, DistCalcMethod distCalcMethod) {
omp_set_num_threads(threadnum);
std::atomic_int numSamples = { 0 };
int D = -1;
int threadbase = 0;
std::vector<std::string> inputFileNames = Helper::StrUtils::SplitString(filenames, ",");
for (std::string inputFileName : inputFileNames)
{
#ifndef _MSC_VER
struct stat stat_buf;
stat(inputFileName.c_str(), &stat_buf);
#else
struct _stat64 stat_buf;
int res = _stat64(inputFileName.c_str(), &stat_buf);
#endif
std::uint64_t blocksize = (stat_buf.st_size + threadnum - 1) / threadnum;
#pragma omp parallel for
for (int i = 0; i < threadnum; i++) {
ProcessTSVData<T>(i, threadbase, blocksize, inputFileName, outfile, outmetafile, outmetaindexfile, numSamples, D, distCalcMethod);
}
threadbase += threadnum;
}
MergeData(threadbase, outfile, outmetafile, outmetaindexfile, numSamples, D);
}
};
}
}

View File

@ -28,23 +28,31 @@ namespace SPTAG
class Dataset
{
private:
int rows;
int cols;
std::string name = "Data";
SizeType rows = 0;
DimensionType cols = 1;
bool ownData = false;
T* data = nullptr;
std::vector<T> dataIncremental;
SizeType incRows = 0;
std::vector<T*> incBlocks;
static const SizeType rowsInBlock = 1024 * 1024;
public:
Dataset(): rows(0), cols(1) {}
Dataset(int rows_, int cols_, T* data_ = nullptr, bool transferOnwership_ = true)
Dataset()
{
incBlocks.reserve(MaxSize / rowsInBlock + 1);
}
Dataset(SizeType rows_, DimensionType cols_, T* data_ = nullptr, bool transferOnwership_ = true)
{
Initialize(rows_, cols_, data_, transferOnwership_);
incBlocks.reserve(MaxSize / rowsInBlock + 1);
}
~Dataset()
{
if (ownData) aligned_free(data);
for (T* ptr : incBlocks) aligned_free(ptr);
incBlocks.clear();
}
void Initialize(int rows_, int cols_, T* data_ = nullptr, bool transferOnwership_ = true)
void Initialize(SizeType rows_, DimensionType cols_, T* data_ = nullptr, bool transferOnwership_ = true)
{
rows = rows_;
cols = cols_;
@ -52,161 +60,166 @@ namespace SPTAG
if (data_ == nullptr || !transferOnwership_)
{
ownData = true;
data = (T*)aligned_malloc(sizeof(T) * rows * cols, ALIGN);
if (data_ != nullptr) memcpy(data, data_, rows * cols * sizeof(T));
else std::memset(data, -1, rows * cols * sizeof(T));
data = (T*)aligned_malloc(((size_t)rows) * cols * sizeof(T), ALIGN);
if (data_ != nullptr) memcpy(data, data_, ((size_t)rows) * cols * sizeof(T));
else std::memset(data, -1, ((size_t)rows) * cols * sizeof(T));
}
}
void SetR(int R_)
void SetName(const std::string name_) { name = name_; }
void SetR(SizeType R_)
{
if (R_ >= rows)
dataIncremental.resize((R_ - rows) * cols);
else
incRows = R_ - rows;
else
{
rows = R_;
dataIncremental.clear();
incRows = 0;
}
}
inline int R() const { return (int)(rows + dataIncremental.size() / cols); }
inline int C() const { return cols; }
T* operator[](int index)
inline SizeType R() const { return rows + incRows; }
inline DimensionType C() const { return cols; }
inline std::uint64_t BufferSize() const { return sizeof(SizeType) + sizeof(DimensionType) + sizeof(T) * R() * C(); }
inline const T* At(SizeType index) const
{
if (index >= rows) {
return dataIncremental.data() + (size_t)(index - rows)*cols;
SizeType incIndex = index - rows;
return incBlocks[incIndex / rowsInBlock] + ((size_t)(incIndex % rowsInBlock)) * cols;
}
return data + (size_t)index*cols;
return data + ((size_t)index) * cols;
}
const T* operator[](int index) const
T* operator[](SizeType index)
{
if (index >= rows) {
return dataIncremental.data() + (size_t)(index - rows)*cols;
}
return data + (size_t)index*cols;
return (T*)At(index);
}
const T* operator[](SizeType index) const
{
return At(index);
}
void AddBatch(const T* pData, int num)
ErrorCode AddBatch(const T* pData, SizeType num)
{
dataIncremental.insert(dataIncremental.end(), pData, pData + num*cols);
if (R() > MaxSize - num) return ErrorCode::MemoryOverFlow;
SizeType written = 0;
while (written < num) {
SizeType curBlockIdx = (incRows + written) / rowsInBlock;
if (curBlockIdx >= (SizeType)incBlocks.size()) {
T* newBlock = (T*)aligned_malloc(((size_t)rowsInBlock) * cols * sizeof(T), ALIGN);
if (newBlock == nullptr) return ErrorCode::MemoryOverFlow;
incBlocks.push_back(newBlock);
}
SizeType curBlockPos = (incRows + written) % rowsInBlock;
SizeType toWrite = min(rowsInBlock - curBlockPos, num - written);
std::memcpy(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, pData + ((size_t)written) * cols, ((size_t)toWrite) * cols * sizeof(T));
written += toWrite;
}
incRows += written;
return ErrorCode::Success;
}
void AddBatch(int num)
ErrorCode AddBatch(SizeType num)
{
dataIncremental.insert(dataIncremental.end(), (size_t)num*cols, T(-1));
if (R() > MaxSize - num) return ErrorCode::MemoryOverFlow;
SizeType written = 0;
while (written < num) {
SizeType curBlockIdx = (incRows + written) / rowsInBlock;
if (curBlockIdx >= (SizeType)incBlocks.size()) {
T* newBlock = (T*)aligned_malloc(((size_t)rowsInBlock) * cols * sizeof(T), ALIGN);
if (newBlock == nullptr) return ErrorCode::MemoryOverFlow;
incBlocks.push_back(newBlock);
}
SizeType curBlockPos = (incRows + written) % rowsInBlock;
SizeType toWrite = min(rowsInBlock - curBlockPos, num - written);
std::memset(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, -1, ((size_t)toWrite) * cols * sizeof(T));
written += toWrite;
}
incRows += written;
return ErrorCode::Success;
}
bool Save(std::string sDataPointsFileName)
bool Save(std::ostream& p_outstream) const
{
std::cout << "Save Data To " << sDataPointsFileName << std::endl;
FILE * fp = fopen(sDataPointsFileName.c_str(), "wb");
if (fp == NULL) return false;
SizeType CR = R();
p_outstream.write((char*)&CR, sizeof(SizeType));
p_outstream.write((char*)&cols, sizeof(DimensionType));
p_outstream.write((char*)data, sizeof(T) * cols * rows);
int CR = R();
fwrite(&CR, sizeof(int), 1, fp);
fwrite(&cols, sizeof(int), 1, fp);
SizeType blocks = incRows / rowsInBlock;
for (int i = 0; i < blocks; i++)
p_outstream.write((char*)incBlocks[i], sizeof(T) * cols * rowsInBlock);
T* ptr = data;
int toWrite = rows;
while (toWrite > 0)
{
size_t write = fwrite(ptr, sizeof(T) * cols, toWrite, fp);
ptr += write * cols;
toWrite -= (int)write;
}
ptr = dataIncremental.data();
toWrite = CR - rows;
while (toWrite > 0)
{
size_t write = fwrite(ptr, sizeof(T) * cols, toWrite, fp);
ptr += write * cols;
toWrite -= (int)write;
}
fclose(fp);
std::cout << "Save Data (" << CR << ", " << cols << ") Finish!" << std::endl;
SizeType remain = incRows % rowsInBlock;
if (remain > 0) p_outstream.write((char*)incBlocks[blocks], sizeof(T) * cols * remain);
std::cout << "Save " << name << " (" << CR << ", " << cols << ") Finish!" << std::endl;
return true;
}
bool Save(void **pDataPointsMemFile, int64_t &len)
bool Save(std::string sDataPointsFileName) const
{
size_t size = sizeof(int) + sizeof(int) + sizeof(T) * R() *cols;
char *mem = (char*)malloc(size);
if (mem == NULL) return false;
int CR = R();
auto header = (int*)mem;
header[0] = CR;
header[1] = cols;
auto body = &mem[8];
memcpy(body, data, sizeof(T) * cols * rows);
body += sizeof(T) * cols * rows;
memcpy(body, dataIncremental.data(), sizeof(T) * cols * (CR - rows));
body += sizeof(T) * cols * (CR - rows);
*pDataPointsMemFile = mem;
len = size;
std::cout << "Save " << name << " To " << sDataPointsFileName << std::endl;
std::ofstream output(sDataPointsFileName, std::ios::binary);
if (!output.is_open()) return false;
Save(output);
output.close();
return true;
}
bool Load(std::string sDataPointsFileName)
{
std::cout << "Load Data From " << sDataPointsFileName << std::endl;
FILE * fp = fopen(sDataPointsFileName.c_str(), "rb");
if (fp == NULL) return false;
std::cout << "Load " << name << " From " << sDataPointsFileName << std::endl;
std::ifstream input(sDataPointsFileName, std::ios::binary);
if (!input.is_open()) return false;
int R, C;
fread(&R, sizeof(int), 1, fp);
fread(&C, sizeof(int), 1, fp);
input.read((char*)&rows, sizeof(SizeType));
input.read((char*)&cols, sizeof(DimensionType));
Initialize(R, C);
T* ptr = data;
while (R > 0) {
size_t read = fread(ptr, sizeof(T) * C, R, fp);
ptr += read * C;
R -= (int)read;
}
fclose(fp);
std::cout << "Load Data (" << rows << ", " << cols << ") Finish!" << std::endl;
Initialize(rows, cols);
input.read((char*)data, sizeof(T) * cols * rows);
input.close();
std::cout << "Load " << name << " (" << rows << ", " << cols << ") Finish!" << std::endl;
return true;
}
// Functions for loading models from memory mapped files
bool Load(char* pDataPointsMemFile)
{
int R, C;
R = *((int*)pDataPointsMemFile);
pDataPointsMemFile += sizeof(int);
SizeType R;
DimensionType C;
R = *((SizeType*)pDataPointsMemFile);
pDataPointsMemFile += sizeof(SizeType);
C = *((int*)pDataPointsMemFile);
pDataPointsMemFile += sizeof(int);
C = *((DimensionType*)pDataPointsMemFile);
pDataPointsMemFile += sizeof(DimensionType);
Initialize(R, C, (T*)pDataPointsMemFile);
std::cout << "Load " << name << " (" << R << ", " << C << ") Finish!" << std::endl;
return true;
}
bool Refine(const std::vector<int>& indices, std::string sDataPointsFileName)
bool Refine(const std::vector<SizeType>& indices, std::ostream& output)
{
std::cout << "Save Refine Data To " << sDataPointsFileName << std::endl;
FILE * fp = fopen(sDataPointsFileName.c_str(), "wb");
if (fp == NULL) return false;
SizeType R = (SizeType)(indices.size());
output.write((char*)&R, sizeof(SizeType));
output.write((char*)&cols, sizeof(DimensionType));
int R = (int)(indices.size());
fwrite(&R, sizeof(int), 1, fp);
fwrite(&cols, sizeof(int), 1, fp);
// write point one by one in case for cache miss
for (int i = 0; i < R; i++) {
if (indices[i] < rows)
fwrite(data + (size_t)indices[i] * cols, sizeof(T) * cols, 1, fp);
else
fwrite(dataIncremental.data() + (size_t)(indices[i] - rows) * cols, sizeof(T) * cols, 1, fp);
for (SizeType i = 0; i < R; i++) {
output.write((char*)At(indices[i]), sizeof(T) * cols);
}
fclose(fp);
std::cout << "Save Refine " << name << " (" << R << ", " << cols << ") Finish!" << std::endl;
return true;
}
std::cout << "Save Refine Data (" << R << ", " << cols << ") Finish!" << std::endl;
bool Refine(const std::vector<SizeType>& indices, std::string sDataPointsFileName)
{
std::cout << "Save Refine " << name << " To " << sDataPointsFileName << std::endl;
std::ofstream output(sDataPointsFileName, std::ios::binary);
if (!output.is_open()) return false;
Refine(indices, output);
output.close();
return true;
}
};

View File

@ -199,7 +199,7 @@ namespace SPTAG
#endif
/*
template<typename T>
static float ComputeL2Distance(const T *pX, const T *pY, int length)
static float ComputeL2Distance(const T *pX, const T *pY, DimensionType length)
{
float diff = 0;
const T* pEnd1 = pX + length;
@ -217,7 +217,7 @@ namespace SPTAG
result = acc(result, exec(c1, c2)); \
} \
static float ComputeL2Distance(const std::int8_t *pX, const std::int8_t *pY, int length)
static float ComputeL2Distance(const std::int8_t *pX, const std::int8_t *pY, DimensionType length)
{
const std::int8_t* pEnd32 = pX + ((length >> 5) << 5);
const std::int8_t* pEnd16 = pX + ((length >> 4) << 4);
@ -258,7 +258,7 @@ namespace SPTAG
return diff;
}
static float ComputeL2Distance(const std::uint8_t *pX, const std::uint8_t *pY, int length)
static float ComputeL2Distance(const std::uint8_t *pX, const std::uint8_t *pY, DimensionType length)
{
const std::uint8_t* pEnd32 = pX + ((length >> 5) << 5);
const std::uint8_t* pEnd16 = pX + ((length >> 4) << 4);
@ -299,7 +299,7 @@ namespace SPTAG
return diff;
}
static float ComputeL2Distance(const std::int16_t *pX, const std::int16_t *pY, int length)
static float ComputeL2Distance(const std::int16_t *pX, const std::int16_t *pY, DimensionType length)
{
const std::int16_t* pEnd16 = pX + ((length >> 4) << 4);
const std::int16_t* pEnd8 = pX + ((length >> 3) << 3);
@ -341,7 +341,7 @@ namespace SPTAG
return diff;
}
static float ComputeL2Distance(const float *pX, const float *pY, int length)
static float ComputeL2Distance(const float *pX, const float *pY, DimensionType length)
{
const float* pEnd16 = pX + ((length >> 4) << 4);
const float* pEnd4 = pX + ((length >> 2) << 2);
@ -389,14 +389,14 @@ namespace SPTAG
}
/*
template<typename T>
static float ComputeCosineDistance(const T *pX, const T *pY, int length) {
static float ComputeCosineDistance(const T *pX, const T *pY, DimensionType length) {
float diff = 0;
const T* pEnd1 = pX + length;
while (pX < pEnd1) diff += (*pX++) * (*pY++);
return 1 - diff;
}
*/
static float ComputeCosineDistance(const std::int8_t *pX, const std::int8_t *pY, int length) {
static float ComputeCosineDistance(const std::int8_t *pX, const std::int8_t *pY, DimensionType length) {
const std::int8_t* pEnd32 = pX + ((length >> 5) << 5);
const std::int8_t* pEnd16 = pX + ((length >> 4) << 4);
const std::int8_t* pEnd4 = pX + ((length >> 2) << 2);
@ -436,7 +436,7 @@ namespace SPTAG
return 16129 - diff;
}
static float ComputeCosineDistance(const std::uint8_t *pX, const std::uint8_t *pY, int length) {
static float ComputeCosineDistance(const std::uint8_t *pX, const std::uint8_t *pY, DimensionType length) {
const std::uint8_t* pEnd32 = pX + ((length >> 5) << 5);
const std::uint8_t* pEnd16 = pX + ((length >> 4) << 4);
const std::uint8_t* pEnd4 = pX + ((length >> 2) << 2);
@ -476,7 +476,7 @@ namespace SPTAG
return 65025 - diff;
}
static float ComputeCosineDistance(const std::int16_t *pX, const std::int16_t *pY, int length) {
static float ComputeCosineDistance(const std::int16_t *pX, const std::int16_t *pY, DimensionType length) {
const std::int16_t* pEnd16 = pX + ((length >> 4) << 4);
const std::int16_t* pEnd8 = pX + ((length >> 3) << 3);
const std::int16_t* pEnd4 = pX + ((length >> 2) << 2);
@ -517,7 +517,7 @@ namespace SPTAG
return 1073676289 - diff;
}
static float ComputeCosineDistance(const float *pX, const float *pY, int length) {
static float ComputeCosineDistance(const float *pX, const float *pY, DimensionType length) {
const float* pEnd16 = pX + ((length >> 4) << 4);
const float* pEnd4 = pX + ((length >> 2) << 2);
const float* pEnd1 = pX + length;
@ -564,7 +564,7 @@ namespace SPTAG
}
template<typename T>
static inline float ComputeDistance(const T *p1, const T *p2, int length, SPTAG::DistCalcMethod distCalcMethod)
static inline float ComputeDistance(const T *p1, const T *p2, DimensionType length, SPTAG::DistCalcMethod distCalcMethod)
{
if (distCalcMethod == SPTAG::DistCalcMethod::L2)
return ComputeL2Distance(p1, p2, length);
@ -588,7 +588,7 @@ namespace SPTAG
template<typename T>
float (*DistanceCalcSelector(SPTAG::DistCalcMethod p_method)) (const T*, const T*, int)
float (*DistanceCalcSelector(SPTAG::DistCalcMethod p_method)) (const T*, const T*, DimensionType)
{
switch (p_method)
{

View File

@ -16,30 +16,30 @@ namespace SPTAG
public:
FineGrainedLock() {}
~FineGrainedLock() {
for (int i = 0; i < locks.size(); i++)
for (size_t i = 0; i < locks.size(); i++)
locks[i].reset();
locks.clear();
}
void resize(int n) {
int current = (int)locks.size();
void resize(SizeType n) {
SizeType current = (SizeType)locks.size();
if (current <= n) {
locks.resize(n);
for (int i = current; i < n; i++)
for (SizeType i = current; i < n; i++)
locks[i].reset(new std::mutex);
}
else {
for (int i = n; i < current; i++)
for (SizeType i = n; i < current; i++)
locks[i].reset();
locks.resize(n);
}
}
std::mutex& operator[](int idx) {
std::mutex& operator[](SizeType idx) {
return *locks[idx];
}
const std::mutex& operator[](int idx) const {
const std::mutex& operator[](SizeType idx) const {
return *locks[idx];
}
private:

View File

@ -23,9 +23,9 @@ namespace SPTAG
// node type for storing KDT
struct KDTNode
{
int left;
int right;
short split_dim;
SizeType left;
SizeType right;
DimensionType split_dim;
float split_value;
};
@ -39,18 +39,18 @@ namespace SPTAG
m_iSamples(other.m_iSamples) {}
~KDTree() {}
inline const KDTNode& operator[](int index) const { return m_pTreeRoots[index]; }
inline KDTNode& operator[](int index) { return m_pTreeRoots[index]; }
inline const KDTNode& operator[](SizeType index) const { return m_pTreeRoots[index]; }
inline KDTNode& operator[](SizeType index) { return m_pTreeRoots[index]; }
inline int size() const { return (int)m_pTreeRoots.size(); }
inline SizeType size() const { return (SizeType)m_pTreeRoots.size(); }
template <typename T>
void BuildTrees(VectorIndex* p_index, std::vector<int>* indices = nullptr)
void BuildTrees(VectorIndex* p_index, std::vector<SizeType>* indices = nullptr)
{
std::vector<int> localindices;
std::vector<SizeType> localindices;
if (indices == nullptr) {
localindices.resize(p_index->GetNumSamples());
for (int i = 0; i < p_index->GetNumSamples(); i++) localindices[i] = i;
for (SizeType i = 0; i < p_index->GetNumSamples(); i++) localindices[i] = i;
}
else {
localindices.assign(indices->begin(), indices->end());
@ -63,58 +63,41 @@ namespace SPTAG
{
Sleep(i * 100); std::srand(clock());
std::vector<int> pindices(localindices.begin(), localindices.end());
std::vector<SizeType> pindices(localindices.begin(), localindices.end());
std::random_shuffle(pindices.begin(), pindices.end());
m_pTreeStart[i] = i * (int)pindices.size();
m_pTreeStart[i] = i * (SizeType)pindices.size();
std::cout << "Start to build KDTree " << i + 1 << std::endl;
int iTreeSize = m_pTreeStart[i];
DivideTree<T>(p_index, pindices, 0, (int)pindices.size() - 1, m_pTreeStart[i], iTreeSize);
SizeType iTreeSize = m_pTreeStart[i];
DivideTree<T>(p_index, pindices, 0, (SizeType)pindices.size() - 1, m_pTreeStart[i], iTreeSize);
std::cout << i + 1 << " KDTree built, " << iTreeSize - m_pTreeStart[i] << " " << pindices.size() << std::endl;
}
}
bool SaveTrees(void **pKDTMemFile, int64_t &len) const
inline std::uint64_t BufferSize() const
{
return sizeof(int) + sizeof(SizeType) * m_iTreeNumber +
sizeof(SizeType) + sizeof(KDTNode) * m_pTreeRoots.size();
}
bool SaveTrees(std::ostream& p_outstream) const
{
int treeNodeSize = (int)m_pTreeRoots.size();
size_t size = sizeof(int) +
sizeof(int) * m_iTreeNumber +
sizeof(int) +
sizeof(KDTNode) * treeNodeSize;
char *mem = (char*)malloc(size);
if (mem == NULL) return false;
auto ptr = mem;
*(int*)ptr = m_iTreeNumber;
ptr += sizeof(int);
memcpy(ptr, m_pTreeStart.data(), sizeof(int) * m_iTreeNumber);
ptr += sizeof(int) * m_iTreeNumber;
*(int*)ptr = treeNodeSize;
ptr += sizeof(int);
memcpy(ptr, m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize);
*pKDTMemFile = mem;
len = size;
p_outstream.write((char*)&m_iTreeNumber, sizeof(int));
p_outstream.write((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber);
SizeType treeNodeSize = (SizeType)m_pTreeRoots.size();
p_outstream.write((char*)&treeNodeSize, sizeof(SizeType));
p_outstream.write((char*)m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize);
std::cout << "Save KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
return true;
}
bool SaveTrees(std::string sTreeFileName) const
{
std::cout << "Save KDT to " << sTreeFileName << std::endl;
FILE *fp = fopen(sTreeFileName.c_str(), "wb");
if (fp == NULL) return false;
fwrite(&m_iTreeNumber, sizeof(int), 1, fp);
fwrite(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp);
int treeNodeSize = (int)m_pTreeRoots.size();
fwrite(&treeNodeSize, sizeof(int), 1, fp);
fwrite(m_pTreeRoots.data(), sizeof(KDTNode), treeNodeSize, fp);
fclose(fp);
std::cout << "Save KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
std::ofstream output(sTreeFileName, std::ios::binary);
if (!output.is_open()) return false;
SaveTrees(output);
output.close();
return true;
}
@ -123,31 +106,32 @@ namespace SPTAG
m_iTreeNumber = *((int*)pKDTMemFile);
pKDTMemFile += sizeof(int);
m_pTreeStart.resize(m_iTreeNumber);
memcpy(m_pTreeStart.data(), pKDTMemFile, sizeof(int) * m_iTreeNumber);
pKDTMemFile += sizeof(int)*m_iTreeNumber;
memcpy(m_pTreeStart.data(), pKDTMemFile, sizeof(SizeType) * m_iTreeNumber);
pKDTMemFile += sizeof(SizeType)*m_iTreeNumber;
int treeNodeSize = *((int*)pKDTMemFile);
pKDTMemFile += sizeof(int);
SizeType treeNodeSize = *((SizeType*)pKDTMemFile);
pKDTMemFile += sizeof(SizeType);
m_pTreeRoots.resize(treeNodeSize);
memcpy(m_pTreeRoots.data(), pKDTMemFile, sizeof(KDTNode) * treeNodeSize);
std::cout << "Load KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
return true;
}
bool LoadTrees(std::string sTreeFileName)
{
std::cout << "Load KDT From " << sTreeFileName << std::endl;
FILE *fp = fopen(sTreeFileName.c_str(), "rb");
if (fp == NULL) return false;
std::ifstream input(sTreeFileName, std::ios::binary);
if (!input.is_open()) return false;
fread(&m_iTreeNumber, sizeof(int), 1, fp);
input.read((char*)&m_iTreeNumber, sizeof(int));
m_pTreeStart.resize(m_iTreeNumber);
fread(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp);
input.read((char*)m_pTreeStart.data(), sizeof(SizeType) * m_iTreeNumber);
int treeNodeSize;
fread(&treeNodeSize, sizeof(int), 1, fp);
SizeType treeNodeSize;
input.read((char*)&treeNodeSize, sizeof(SizeType));
m_pTreeRoots.resize(treeNodeSize);
fread(m_pTreeRoots.data(), sizeof(KDTNode), treeNodeSize, fp);
fclose(fp);
input.read((char*)m_pTreeRoots.data(), sizeof(KDTNode) * treeNodeSize);
input.close();
std::cout << "Load KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl;
return true;
}
@ -155,7 +139,7 @@ namespace SPTAG
template <typename T>
void InitSearchTrees(const VectorIndex* p_index, const COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const int p_limits) const
{
for (char i = 0; i < m_iTreeNumber; i++) {
for (int i = 0; i < m_iTreeNumber; i++) {
KDTSearch(p_index, p_query, p_space, m_pTreeStart[i], true, 0);
}
@ -181,10 +165,10 @@ namespace SPTAG
template <typename T>
void KDTSearch(const VectorIndex* p_index, const COMMON::QueryResultSet<T> &p_query,
COMMON::WorkSpace& p_space, const int node, const bool isInit, const float distBound) const {
COMMON::WorkSpace& p_space, const SizeType node, const bool isInit, const float distBound) const {
if (node < 0)
{
int index = -node - 1;
SizeType index = -node - 1;
if (index >= p_index->GetNumSamples()) return;
#ifdef PREFETCH
const char* data = (const char *)(p_index->GetSample(index));
@ -203,7 +187,7 @@ namespace SPTAG
float diff = (p_query.GetTarget())[tnode.split_dim] - tnode.split_value;
float distanceBound = distBound + diff * diff;
int otherChild, bestChild;
SizeType otherChild, bestChild;
if (diff < 0)
{
bestChild = tnode.left;
@ -224,10 +208,10 @@ namespace SPTAG
template <typename T>
void DivideTree(VectorIndex* p_index, std::vector<int>& indices, int first, int last,
int index, int &iTreeSize) {
void DivideTree(VectorIndex* p_index, std::vector<SizeType>& indices, SizeType first, SizeType last,
SizeType index, SizeType &iTreeSize) {
ChooseDivision<T>(p_index, m_pTreeRoots[index], indices, first, last);
int i = Subdivide<T>(p_index, m_pTreeRoots[index], indices, first, last);
SizeType i = Subdivide<T>(p_index, m_pTreeRoots[index], indices, first, last);
if (i - 1 <= first)
{
m_pTreeRoots[index].left = -indices[first] - 1;
@ -251,30 +235,30 @@ namespace SPTAG
}
template <typename T>
void ChooseDivision(VectorIndex* p_index, KDTNode& node, const std::vector<int>& indices, const int first, const int last)
void ChooseDivision(VectorIndex* p_index, KDTNode& node, const std::vector<SizeType>& indices, const SizeType first, const SizeType last)
{
std::vector<float> meanValues(p_index->GetFeatureDim(), 0);
std::vector<float> varianceValues(p_index->GetFeatureDim(), 0);
int end = min(first + m_iSamples, last);
int count = end - first + 1;
SizeType end = min(first + m_iSamples, last);
SizeType count = end - first + 1;
// calculate the mean of each dimension
for (int j = first; j <= end; j++)
for (SizeType j = first; j <= end; j++)
{
const T* v = (const T*)p_index->GetSample(indices[j]);
for (int k = 0; k < p_index->GetFeatureDim(); k++)
for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++)
{
meanValues[k] += v[k];
}
}
for (int k = 0; k < p_index->GetFeatureDim(); k++)
for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++)
{
meanValues[k] /= count;
}
// calculate the variance of each dimension
for (int j = first; j <= end; j++)
for (SizeType j = first; j <= end; j++)
{
const T* v = (const T*)p_index->GetSample(indices[j]);
for (int k = 0; k < p_index->GetFeatureDim(); k++)
for (DimensionType k = 0; k < p_index->GetFeatureDim(); k++)
{
float dist = v[k] - meanValues[k];
varianceValues[k] += dist*dist;
@ -286,13 +270,13 @@ namespace SPTAG
node.split_value = meanValues[node.split_dim];
}
int SelectDivisionDimension(const std::vector<float>& varianceValues) const
DimensionType SelectDivisionDimension(const std::vector<float>& varianceValues) const
{
// Record the top maximum variances
std::vector<int> topind(m_numTopDimensionKDTSplit);
std::vector<DimensionType> topind(m_numTopDimensionKDTSplit);
int num = 0;
// order the variances
for (int i = 0; i < varianceValues.size(); i++)
for (DimensionType i = 0; i < (DimensionType)varianceValues.size(); i++)
{
if (num < m_numTopDimensionKDTSplit || varianceValues[i] > varianceValues[topind[num - 1]])
{
@ -314,18 +298,18 @@ namespace SPTAG
}
}
// randomly choose a dimension from TOP_DIM
return topind[COMMON::Utils::rand_int(num)];
return topind[COMMON::Utils::rand(num)];
}
template <typename T>
int Subdivide(VectorIndex* p_index, const KDTNode& node, std::vector<int>& indices, const int first, const int last) const
SizeType Subdivide(VectorIndex* p_index, const KDTNode& node, std::vector<SizeType>& indices, const SizeType first, const SizeType last) const
{
int i = first;
int j = last;
SizeType i = first;
SizeType j = last;
// decide which child one point belongs
while (i <= j)
{
int ind = indices[i];
SizeType ind = indices[i];
const T* v = (const T*)p_index->GetSample(ind);
float val = v[node.split_dim];
if (val < node.split_value)
@ -347,7 +331,7 @@ namespace SPTAG
}
private:
std::vector<int> m_pTreeStart;
std::vector<SizeType> m_pTreeStart;
std::vector<KDTNode> m_pTreeRoots;
public:

View File

@ -27,18 +27,21 @@ namespace SPTAG
m_iCEFScale(2),
m_iRefineIter(0),
m_iCEF(1000),
m_iMaxCheckForRefineGraph(10000) {}
m_iMaxCheckForRefineGraph(10000)
{
m_pNeighborhoodGraph.SetName("Graph");
}
~NeighborhoodGraph() {}
virtual void InsertNeighbors(VectorIndex* index, const int node, int insertNode, float insertDist) = 0;
virtual void InsertNeighbors(VectorIndex* index, const SizeType node, SizeType insertNode, float insertDist) = 0;
virtual void RebuildNeighbors(VectorIndex* index, const int node, int* nodes, const BasicResult* queryResults, const int numResults) = 0;
virtual void RebuildNeighbors(VectorIndex* index, const SizeType node, SizeType* nodes, const BasicResult* queryResults, const int numResults) = 0;
virtual float GraphAccuracyEstimation(VectorIndex* index, const int samples, const std::unordered_map<int, int>* idmap = nullptr) = 0;
virtual float GraphAccuracyEstimation(VectorIndex* index, const SizeType samples, const std::unordered_map<SizeType, SizeType>* idmap = nullptr) = 0;
template <typename T>
void BuildGraph(VectorIndex* index, const std::unordered_map<int, int>* idmap = nullptr)
void BuildGraph(VectorIndex* index, const std::unordered_map<SizeType, SizeType>* idmap = nullptr)
{
std::cout << "build RNG graph!" << std::endl;
@ -55,11 +58,11 @@ namespace SPTAG
{
COMMON::Dataset<float> NeighborhoodDists(m_iGraphSize, m_iNeighborhoodSize);
std::vector<std::vector<int>> TptreeDataIndices(m_iTPTNumber, std::vector<int>(m_iGraphSize));
std::vector<std::vector<std::pair<int, int>>> TptreeLeafNodes(m_iTPTNumber, std::vector<std::pair<int, int>>());
std::vector<std::vector<SizeType>> TptreeDataIndices(m_iTPTNumber, std::vector<SizeType>(m_iGraphSize));
std::vector<std::vector<std::pair<SizeType, SizeType>>> TptreeLeafNodes(m_iTPTNumber, std::vector<std::pair<SizeType, SizeType>>());
for (int i = 0; i < m_iGraphSize; i++)
for (int j = 0; j < m_iNeighborhoodSize; j++)
for (SizeType i = 0; i < m_iGraphSize; i++)
for (DimensionType j = 0; j < m_iNeighborhoodSize; j++)
(NeighborhoodDists)[i][j] = MaxDist;
std::cout << "Parallel TpTree Partition begin " << std::endl;
@ -67,7 +70,7 @@ namespace SPTAG
for (int i = 0; i < m_iTPTNumber; i++)
{
Sleep(i * 100); std::srand(clock());
for (int j = 0; j < m_iGraphSize; j++) TptreeDataIndices[i][j] = j;
for (SizeType j = 0; j < m_iGraphSize; j++) TptreeDataIndices[i][j] = j;
std::random_shuffle(TptreeDataIndices[i].begin(), TptreeDataIndices[i].end());
PartitionByTptree<T>(index, TptreeDataIndices[i], 0, m_iGraphSize - 1, TptreeLeafNodes[i]);
std::cout << "Finish Getting Leaves for Tree " << i << std::endl;
@ -77,17 +80,17 @@ namespace SPTAG
for (int i = 0; i < m_iTPTNumber; i++)
{
#pragma omp parallel for schedule(dynamic)
for (int j = 0; j < TptreeLeafNodes[i].size(); j++)
for (SizeType j = 0; j < (SizeType)TptreeLeafNodes[i].size(); j++)
{
int start_index = TptreeLeafNodes[i][j].first;
int end_index = TptreeLeafNodes[i][j].second;
SizeType start_index = TptreeLeafNodes[i][j].first;
SizeType end_index = TptreeLeafNodes[i][j].second;
if (omp_get_thread_num() == 0) std::cout << "\rProcessing Tree " << i << ' ' << j * 100 / TptreeLeafNodes[i].size() << '%';
for (int x = start_index; x < end_index; x++)
for (SizeType x = start_index; x < end_index; x++)
{
for (int y = x + 1; y <= end_index; y++)
for (SizeType y = x + 1; y <= end_index; y++)
{
int p1 = TptreeDataIndices[i][x];
int p2 = TptreeDataIndices[i][y];
SizeType p1 = TptreeDataIndices[i][x];
SizeType p2 = TptreeDataIndices[i][y];
float dist = index->ComputeDistance(index->GetSample(p1), index->GetSample(p2));
if (idmap != nullptr) {
p1 = (idmap->find(p1) == idmap->end()) ? p1 : idmap->at(p1);
@ -112,13 +115,13 @@ namespace SPTAG
}
template <typename T>
void RefineGraph(VectorIndex* index, const std::unordered_map<int, int>* idmap = nullptr)
void RefineGraph(VectorIndex* index, const std::unordered_map<SizeType, SizeType>* idmap = nullptr)
{
m_iCEF *= m_iCEFScale;
m_iMaxCheckForRefineGraph *= m_iCEFScale;
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < m_iGraphSize; i++)
for (SizeType i = 0; i < m_iGraphSize; i++)
{
RefineNode<T>(index, i, false);
if (i % 1000 == 0) std::cout << "\rRefine 1 " << (i * 100 / m_iGraphSize) << "%";
@ -130,7 +133,7 @@ namespace SPTAG
m_iNeighborhoodSize /= m_iNeighborhoodScale;
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < m_iGraphSize; i++)
for (SizeType i = 0; i < m_iGraphSize; i++)
{
RefineNode<T>(index, i, false);
if (i % 1000 == 0) std::cout << "\rRefine 2 " << (i * 100 / m_iGraphSize) << "%";
@ -147,17 +150,17 @@ namespace SPTAG
}
template <typename T>
ErrorCode RefineGraph(VectorIndex* index, std::vector<int>& indices, std::vector<int>& reverseIndices,
std::string graphFileName, const std::unordered_map<int, int>* idmap = nullptr)
ErrorCode RefineGraph(VectorIndex* index, std::vector<SizeType>& indices, std::vector<SizeType>& reverseIndices,
std::ostream& output, const std::unordered_map<SizeType, SizeType>* idmap = nullptr)
{
int R = (int)indices.size();
SizeType R = (SizeType)indices.size();
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < R; i++)
for (SizeType i = 0; i < R; i++)
{
RefineNode<T>(index, indices[i], false);
int* nodes = m_pNeighborhoodGraph[indices[i]];
for (int j = 0; j < m_iNeighborhoodSize; j++)
SizeType* nodes = m_pNeighborhoodGraph[indices[i]];
for (DimensionType j = 0; j < m_iNeighborhoodSize; j++)
{
if (nodes[j] < 0) nodes[j] = -1;
else nodes[j] = reverseIndices[nodes[j]];
@ -166,20 +169,13 @@ namespace SPTAG
nodes[m_iNeighborhoodSize - 1] = -2 - idmap->at(-1 - indices[i]);
}
std::ofstream graphOut(graphFileName, std::ios::binary);
if (!graphOut.is_open()) return ErrorCode::FailedCreateFile;
graphOut.write((char*)&R, sizeof(int));
graphOut.write((char*)&m_iNeighborhoodSize, sizeof(int));
for (int i = 0; i < R; i++) {
graphOut.write((char*)m_pNeighborhoodGraph[indices[i]], sizeof(int) * m_iNeighborhoodSize);
}
graphOut.close();
m_pNeighborhoodGraph.Refine(indices, output);
return ErrorCode::Success;
}
template <typename T>
void RefineNode(VectorIndex* index, const int node, bool updateNeighbors)
void RefineNode(VectorIndex* index, const SizeType node, bool updateNeighbors)
{
COMMON::QueryResultSet<T> query((const T*)index->GetSample(node), m_iCEF + 1);
index->SearchIndex(query);
@ -200,8 +196,8 @@ namespace SPTAG
}
template <typename T>
void PartitionByTptree(VectorIndex* index, std::vector<int>& indices, const int first, const int last,
std::vector<std::pair<int, int>> & leaves)
void PartitionByTptree(VectorIndex* index, std::vector<SizeType>& indices, const SizeType first, const SizeType last,
std::vector<std::pair<SizeType, SizeType>> & leaves)
{
if (last - first <= m_iTPTLeafSize)
{
@ -212,39 +208,39 @@ namespace SPTAG
std::vector<float> Mean(index->GetFeatureDim(), 0);
int iIteration = 100;
int end = min(first + m_iSamples, last);
int count = end - first + 1;
SizeType end = min(first + m_iSamples, last);
SizeType count = end - first + 1;
// calculate the mean of each dimension
for (int j = first; j <= end; j++)
for (SizeType j = first; j <= end; j++)
{
const T* v = (const T*)index->GetSample(indices[j]);
for (int k = 0; k < index->GetFeatureDim(); k++)
for (DimensionType k = 0; k < index->GetFeatureDim(); k++)
{
Mean[k] += v[k];
}
}
for (int k = 0; k < index->GetFeatureDim(); k++)
for (DimensionType k = 0; k < index->GetFeatureDim(); k++)
{
Mean[k] /= count;
}
std::vector<BasicResult> Variance;
Variance.reserve(index->GetFeatureDim());
for (int j = 0; j < index->GetFeatureDim(); j++)
for (DimensionType j = 0; j < index->GetFeatureDim(); j++)
{
Variance.push_back(BasicResult(j, 0));
}
// calculate the variance of each dimension
for (int j = first; j <= end; j++)
for (SizeType j = first; j <= end; j++)
{
const T* v = (const T*)index->GetSample(indices[j]);
for (int k = 0; k < index->GetFeatureDim(); k++)
for (DimensionType k = 0; k < index->GetFeatureDim(); k++)
{
float dist = v[k] - Mean[k];
Variance[k].Dist += dist*dist;
}
}
std::sort(Variance.begin(), Variance.end(), COMMON::Compare);
std::vector<int> indexs(m_numTopDimensionTPTSplit);
std::vector<SizeType> indexs(m_numTopDimensionTPTSplit);
std::vector<float> weight(m_numTopDimensionTPTSplit), bestweight(m_numTopDimensionTPTSplit);
float bestvariance = Variance[index->GetFeatureDim() - 1].Dist;
for (int i = 0; i < m_numTopDimensionTPTSplit; i++)
@ -270,7 +266,7 @@ namespace SPTAG
weight[j] /= sumweight;
}
float mean = 0;
for (int j = 0; j < count; j++)
for (SizeType j = 0; j < count; j++)
{
Val[j] = 0;
const T* v = (const T*)index->GetSample(indices[first + j]);
@ -282,7 +278,7 @@ namespace SPTAG
}
mean /= count;
float var = 0;
for (int j = 0; j < count; j++)
for (SizeType j = 0; j < count; j++)
{
float dist = Val[j] - mean;
var += dist * dist;
@ -297,8 +293,8 @@ namespace SPTAG
}
}
}
int i = first;
int j = last;
SizeType i = first;
SizeType j = last;
// decide which child one point belongs
while (i <= j)
{
@ -336,100 +332,71 @@ namespace SPTAG
}
}
inline std::uint64_t BufferSize() const
{
return m_pNeighborhoodGraph.BufferSize();
}
bool LoadGraph(std::string sGraphFilename)
{
std::cout << "Load Graph From " << sGraphFilename << std::endl;
FILE * fp = fopen(sGraphFilename.c_str(), "rb");
if (fp == NULL) return false;
if (!m_pNeighborhoodGraph.Load(sGraphFilename)) return false;
fread(&m_iGraphSize, sizeof(int), 1, fp);
fread(&m_iNeighborhoodSize, sizeof(int), 1, fp);
m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize);
m_iGraphSize = m_pNeighborhoodGraph.R();
m_iNeighborhoodSize = m_pNeighborhoodGraph.C();
m_dataUpdateLock.resize(m_iGraphSize);
for (int i = 0; i < m_iGraphSize; i++)
{
fread((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp);
}
fclose(fp);
std::cout << "Load Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl;
return true;
}
bool LoadGraphFromMemory(char* pGraphMemFile)
bool LoadGraph(char* pGraphMemFile)
{
m_iGraphSize = *((int*)pGraphMemFile);
pGraphMemFile += sizeof(int);
m_pNeighborhoodGraph.Load(pGraphMemFile);
m_iNeighborhoodSize = *((int*)pGraphMemFile);
pGraphMemFile += sizeof(int);
m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize, (int*)pGraphMemFile);
m_iGraphSize = m_pNeighborhoodGraph.R();
m_iNeighborhoodSize = m_pNeighborhoodGraph.C();
m_dataUpdateLock.resize(m_iGraphSize);
return true;
}
bool SaveGraph(std::string sGraphFilename) const
{
std::cout << "Save Graph To " << sGraphFilename << std::endl;
FILE *fp = fopen(sGraphFilename.c_str(), "wb");
if (fp == NULL) return false;
fwrite(&m_iGraphSize, sizeof(int), 1, fp);
fwrite(&m_iNeighborhoodSize, sizeof(int), 1, fp);
for (int i = 0; i < m_iGraphSize; i++)
{
fwrite((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp);
}
fclose(fp);
std::cout << "Save Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl;
return true;
return m_pNeighborhoodGraph.Save(sGraphFilename);
}
bool SaveGraphToMemory(void **pGraphMemFile, int64_t &len) {
size_t size = sizeof(int) + sizeof(int) + sizeof(int) * m_iNeighborhoodSize * m_iGraphSize;
char *mem = (char*)malloc(size);
if (mem == NULL) return false;
auto ptr = mem;
*(int*)ptr = m_iGraphSize;
ptr += sizeof(int);
*(int*)ptr = m_iNeighborhoodSize;
ptr += sizeof(int);
for (int i = 0; i < m_iGraphSize; i++)
{
memcpy(ptr, (m_pNeighborhoodGraph)[i], sizeof(int) * m_iNeighborhoodSize);
ptr += sizeof(int) * m_iNeighborhoodSize;
}
*pGraphMemFile = mem;
len = size;
return true;
bool SaveGraph(std::ostream& output) const
{
return m_pNeighborhoodGraph.Save(output);
}
inline void AddBatch(int num) { m_pNeighborhoodGraph.AddBatch(num); m_iGraphSize += num; m_dataUpdateLock.resize(m_iGraphSize); }
inline ErrorCode AddBatch(SizeType num)
{
ErrorCode ret = m_pNeighborhoodGraph.AddBatch(num);
if (ret != ErrorCode::Success) return ret;
inline int* operator[](int index) { return m_pNeighborhoodGraph[index]; }
m_iGraphSize += num;
m_dataUpdateLock.resize(m_iGraphSize);
return ErrorCode::Success;
}
inline const int* operator[](int index) const { return m_pNeighborhoodGraph[index]; }
inline SizeType* operator[](SizeType index) { return m_pNeighborhoodGraph[index]; }
inline void SetR(int rows) { m_pNeighborhoodGraph.SetR(rows); m_iGraphSize = rows; m_dataUpdateLock.resize(m_iGraphSize); }
inline const SizeType* operator[](SizeType index) const { return m_pNeighborhoodGraph[index]; }
inline int R() const { return m_iGraphSize; }
inline void SetR(SizeType rows) { m_pNeighborhoodGraph.SetR(rows); m_iGraphSize = rows; m_dataUpdateLock.resize(m_iGraphSize); }
inline SizeType R() const { return m_iGraphSize; }
static std::shared_ptr<NeighborhoodGraph> CreateInstance(std::string type);
protected:
// Graph structure
int m_iGraphSize;
COMMON::Dataset<int> m_pNeighborhoodGraph;
SizeType m_iGraphSize;
COMMON::Dataset<SizeType> m_pNeighborhoodGraph;
COMMON::FineGrainedLock m_dataUpdateLock; // protect one row of the graph
public:
int m_iTPTNumber, m_iTPTLeafSize, m_iSamples, m_numTopDimensionTPTSplit;
int m_iNeighborhoodSize, m_iNeighborhoodScale, m_iCEFScale, m_iRefineIter, m_iCEF, m_iMaxCheckForRefineGraph;
DimensionType m_iNeighborhoodSize;
int m_iNeighborhoodScale, m_iCEFScale, m_iRefineIter, m_iCEF, m_iMaxCheckForRefineGraph;
};
}
}

View File

@ -51,7 +51,7 @@ public:
return m_results[0].Dist;
}
bool AddPoint(const int index, float dist)
bool AddPoint(const SizeType index, float dist)
{
if (dist < m_results[0].Dist || (dist == m_results[0].Dist && index < m_results[0].VID))
{

View File

@ -13,15 +13,15 @@ namespace SPTAG
class RelativeNeighborhoodGraph: public NeighborhoodGraph
{
public:
void RebuildNeighbors(VectorIndex* index, const int node, int* nodes, const BasicResult* queryResults, const int numResults) {
int count = 0;
void RebuildNeighbors(VectorIndex* index, const SizeType node, SizeType* nodes, const BasicResult* queryResults, const int numResults) {
DimensionType count = 0;
for (int j = 0; j < numResults && count < m_iNeighborhoodSize; j++) {
const BasicResult& item = queryResults[j];
if (item.VID < 0) break;
if (item.VID == node) continue;
bool good = true;
for (int k = 0; k < count; k++) {
for (DimensionType k = 0; k < count; k++) {
if (index->ComputeDistance(index->GetSample(nodes[k]), index->GetSample(item.VID)) <= item.Dist) {
good = false;
break;
@ -29,21 +29,21 @@ namespace SPTAG
}
if (good) nodes[count++] = item.VID;
}
for (int j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1;
for (DimensionType j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1;
}
void InsertNeighbors(VectorIndex* index, const int node, int insertNode, float insertDist)
void InsertNeighbors(VectorIndex* index, const SizeType node, SizeType insertNode, float insertDist)
{
int* nodes = m_pNeighborhoodGraph[node];
for (int k = 0; k < m_iNeighborhoodSize; k++)
SizeType* nodes = m_pNeighborhoodGraph[node];
for (DimensionType k = 0; k < m_iNeighborhoodSize; k++)
{
int tmpNode = nodes[k];
SizeType tmpNode = nodes[k];
if (tmpNode < -1) continue;
if (tmpNode < 0)
{
bool good = true;
for (int t = 0; t < k; t++) {
for (DimensionType t = 0; t < k; t++) {
if (index->ComputeDistance(index->GetSample(insertNode), index->GetSample(nodes[t])) < insertDist) {
good = false;
break;
@ -58,7 +58,7 @@ namespace SPTAG
if (insertDist < tmpDist || (insertDist == tmpDist && insertNode < tmpNode))
{
bool good = true;
for (int t = 0; t < k; t++) {
for (DimensionType t = 0; t < k; t++) {
if (index->ComputeDistance(index->GetSample(insertNode), index->GetSample(nodes[t])) < insertDist) {
good = false;
break;
@ -76,33 +76,33 @@ namespace SPTAG
}
}
float GraphAccuracyEstimation(VectorIndex* index, const int samples, const std::unordered_map<int, int>* idmap = nullptr)
float GraphAccuracyEstimation(VectorIndex* index, const SizeType samples, const std::unordered_map<SizeType, SizeType>* idmap = nullptr)
{
int* correct = new int[samples];
DimensionType* correct = new DimensionType[samples];
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < samples; i++)
for (SizeType i = 0; i < samples; i++)
{
int x = COMMON::Utils::rand_int(m_iGraphSize);
SizeType x = COMMON::Utils::rand(m_iGraphSize);
//int x = i;
COMMON::QueryResultSet<void> query(nullptr, m_iCEF);
for (int y = 0; y < m_iGraphSize; y++)
for (SizeType y = 0; y < m_iGraphSize; y++)
{
if ((idmap != nullptr && idmap->find(y) != idmap->end())) continue;
float dist = index->ComputeDistance(index->GetSample(x), index->GetSample(y));
query.AddPoint(y, dist);
}
query.SortResult();
int * exact_rng = new int[m_iNeighborhoodSize];
SizeType * exact_rng = new SizeType[m_iNeighborhoodSize];
RebuildNeighbors(index, x, exact_rng, query.GetResults(), m_iCEF);
correct[i] = 0;
for (int j = 0; j < m_iNeighborhoodSize; j++) {
for (DimensionType j = 0; j < m_iNeighborhoodSize; j++) {
if (exact_rng[j] == -1) {
correct[i] += m_iNeighborhoodSize - j;
break;
}
for (int k = 0; k < m_iNeighborhoodSize; k++)
for (DimensionType k = 0; k < m_iNeighborhoodSize; k++)
if ((m_pNeighborhoodGraph)[x][k] == exact_rng[j]) {
correct[i]++;
break;
@ -111,7 +111,7 @@ namespace SPTAG
delete[] exact_rng;
}
float acc = 0;
for (int i = 0; i < samples; i++) acc += float(correct[i]);
for (SizeType i = 0; i < samples; i++) acc += float(correct[i]);
acc = acc / samples / m_iNeighborhoodSize;
delete[] correct;
return acc;

View File

@ -14,10 +14,10 @@ namespace SPTAG
// node type in the priority queue
struct HeapCell
{
int node;
SizeType node;
float distance;
HeapCell(int _node = -1, float _distance = MaxDist) : node(_node), distance(_distance) {}
HeapCell(SizeType _node = -1, float _distance = MaxDist) : node(_node), distance(_distance) {}
inline bool operator < (const HeapCell& rhs)
{
@ -45,12 +45,12 @@ namespace SPTAG
// Record 2 hash tables.
// [0~m_poolSize + 1) is the first block.
// [m_poolSize + 1, 2*(m_poolSize + 1)) is the second block;
int m_hashTable[(m_poolSize + 1) * 2];
SizeType m_hashTable[(m_poolSize + 1) * 2];
inline unsigned hash_func2(int idx, int loop)
inline unsigned hash_func2(unsigned idx, int loop)
{
return ((unsigned)idx + loop) & m_poolSize;
return (idx + loop) & m_poolSize;
}
@ -65,7 +65,7 @@ namespace SPTAG
~OptHashPosVector() {}
void Init(int size)
void Init(SizeType size)
{
m_secondHash = true;
clear();
@ -76,31 +76,31 @@ namespace SPTAG
if (!m_secondHash)
{
// Clear first block.
memset(&m_hashTable[0], 0, sizeof(int)*(m_poolSize + 1));
memset(&m_hashTable[0], 0, sizeof(SizeType)*(m_poolSize + 1));
}
else
{
// Clear all blocks.
memset(&m_hashTable[0], 0, 2 * sizeof(int) * (m_poolSize + 1));
memset(&m_hashTable[0], 0, 2 * sizeof(SizeType) * (m_poolSize + 1));
m_secondHash = false;
}
}
inline bool CheckAndSet(int idx)
inline bool CheckAndSet(SizeType idx)
{
// Inner Index is begin from 1
return _CheckAndSet(&m_hashTable[0], idx + 1) == 0;
}
inline int _CheckAndSet(int* hashTable, int idx)
inline int _CheckAndSet(SizeType* hashTable, SizeType idx)
{
unsigned index, loop;
unsigned index;
// Get first hash position.
index = hash_func(idx);
for (loop = 0; loop < m_maxLoop; ++loop)
index = hash_func((unsigned)idx);
for (int loop = 0; loop < m_maxLoop; ++loop)
{
if (!hashTable[index])
{
@ -132,7 +132,7 @@ namespace SPTAG
// Variables for each single NN search
struct WorkSpace
{
void Initialize(int maxCheck, int dataSize)
void Initialize(int maxCheck, SizeType dataSize)
{
nodeCheckStatus.Init(dataSize);
m_SPTQueue.Resize(maxCheck * 10);
@ -158,7 +158,7 @@ namespace SPTAG
m_iNumOfContinuousNoBetterPropagation = 0;
}
inline bool CheckAndSet(int idx)
inline bool CheckAndSet(SizeType idx)
{
return nodeCheckStatus.CheckAndSet(idx);
}

View File

@ -17,7 +17,7 @@ namespace COMMON
class WorkSpacePool
{
public:
WorkSpacePool(int p_maxCheck, int p_vectorCount);
WorkSpacePool(int p_maxCheck, SizeType p_vectorCount);
virtual ~WorkSpacePool();
@ -34,7 +34,7 @@ private:
int m_maxCheck;
int m_vectorCount;
SizeType m_vectorCount;
};
}

View File

@ -4,53 +4,223 @@
#ifndef _SPTAG_COMMONDATASTRUCTURE_H_
#define _SPTAG_COMMONDATASTRUCTURE_H_
#include "Common.h"
#include "inc/Core/Common.h"
namespace SPTAG
{
class ByteArray
template<typename T>
class Array
{
public:
ByteArray();
Array();
ByteArray(ByteArray&& p_right);
ByteArray(std::uint8_t* p_array, std::size_t p_length, bool p_transferOnwership);
ByteArray(std::uint8_t* p_array, std::size_t p_length, std::shared_ptr<std::uint8_t> p_dataHolder);
ByteArray(const ByteArray& p_right);
ByteArray& operator= (const ByteArray& p_right);
ByteArray& operator= (ByteArray&& p_right);
~ByteArray();
static ByteArray Alloc(std::size_t p_length);
std::uint8_t* Data() const;
std::size_t Length() const;
Array(T* p_array, std::size_t p_length, bool p_transferOwnership);
void SetData(std::uint8_t* p_array, std::size_t p_length);
Array(T* p_array, std::size_t p_length, std::shared_ptr<T> p_dataHolder);
std::shared_ptr<std::uint8_t> DataHolder() const;
Array(Array<T>&& p_right);
Array(const Array<T>& p_right);
Array<T>& operator= (Array<T>&& p_right);
Array<T>& operator= (const Array<T>& p_right);
T& operator[] (std::size_t p_index);
const T& operator[] (std::size_t p_index) const;
~Array();
T* Data() const;
std::size_t Length() const;
std::shared_ptr<T> DataHolder() const;
void Set(T* p_array, std::size_t p_length, bool p_transferOwnership);
void Clear();
const static ByteArray c_empty;
static Array<T> Alloc(std::size_t p_length);
const static Array<T> c_empty;
private:
std::uint8_t* m_data;
T* m_data;
std::size_t m_length;
// Notice this is holding an array. Set correct deleter for this.
std::shared_ptr<std::uint8_t> m_dataHolder;
std::shared_ptr<T> m_dataHolder;
};
template<typename T>
const Array<T> Array<T>::c_empty;
template<typename T>
Array<T>::Array()
: m_data(nullptr),
m_length(0)
{
}
template<typename T>
Array<T>::Array(T* p_array, std::size_t p_length, bool p_transferOnwership)
: m_data(p_array),
m_length(p_length)
{
if (p_transferOnwership)
{
m_dataHolder.reset(m_data, std::default_delete<T[]>());
}
}
template<typename T>
Array<T>::Array(T* p_array, std::size_t p_length, std::shared_ptr<T> p_dataHolder)
: m_data(p_array),
m_length(p_length),
m_dataHolder(std::move(p_dataHolder))
{
}
template<typename T>
Array<T>::Array(Array<T>&& p_right)
: m_data(p_right.m_data),
m_length(p_right.m_length),
m_dataHolder(std::move(p_right.m_dataHolder))
{
}
template<typename T>
Array<T>::Array(const Array<T>& p_right)
: m_data(p_right.m_data),
m_length(p_right.m_length),
m_dataHolder(p_right.m_dataHolder)
{
}
template<typename T>
Array<T>&
Array<T>::operator= (Array<T>&& p_right)
{
m_data = p_right.m_data;
m_length = p_right.m_length;
m_dataHolder = std::move(p_right.m_dataHolder);
return *this;
}
template<typename T>
Array<T>&
Array<T>::operator= (const Array<T>& p_right)
{
m_data = p_right.m_data;
m_length = p_right.m_length;
m_dataHolder = p_right.m_dataHolder;
return *this;
}
template<typename T>
T&
Array<T>::operator[] (std::size_t p_index)
{
return m_data[p_index];
}
template<typename T>
const T&
Array<T>::operator[] (std::size_t p_index) const
{
return m_data[p_index];
}
template<typename T>
Array<T>::~Array()
{
}
template<typename T>
T*
Array<T>::Data() const
{
return m_data;
}
template<typename T>
std::size_t
Array<T>::Length() const
{
return m_length;
}
template<typename T>
std::shared_ptr<T>
Array<T>::DataHolder() const
{
return m_dataHolder;
}
template<typename T>
void
Array<T>::Set(T* p_array, std::size_t p_length, bool p_transferOwnership)
{
m_data = p_array;
m_length = p_length;
if (p_transferOwnership)
{
m_dataHolder.reset(m_data, std::default_delete<T[]>());
}
}
template<typename T>
void
Array<T>::Clear()
{
m_data = nullptr;
m_length = 0;
m_dataHolder.reset();
}
template<typename T>
Array<T>
Array<T>::Alloc(std::size_t p_length)
{
Array<T> arr;
if (0 == p_length)
{
return arr;
}
arr.m_dataHolder.reset(new T[p_length], std::default_delete<T[]>());
arr.m_length = p_length;
arr.m_data = arr.m_dataHolder.get();
return arr;
}
typedef Array<std::uint8_t> ByteArray;
} // namespace SPTAG
#endif // _SPTAG_COMMONDATASTRUCTURE_H_

View File

@ -28,6 +28,8 @@ DefineErrorCode(FailedOpenFile, 0x0002)
DefineErrorCode(FailedCreateFile, 0x0003)
DefineErrorCode(ParamNotFound, 0x0010)
DefineErrorCode(FailedParseValue, 0x0011)
DefineErrorCode(MemoryOverFlow, 0x0012)
DefineErrorCode(LackOfInputs, 0x0013)
// 0x1000 ~ 0x1FFF Index Build Status

View File

@ -15,12 +15,12 @@
#include "../Common/WorkSpacePool.h"
#include "../Common/RelativeNeighborhoodGraph.h"
#include "../Common/KDTree.h"
#include "inc/Helper/ConcurrentSet.h"
#include "inc/Helper/StringConvert.h"
#include "inc/Helper/SimpleIniReader.h"
#include <functional>
#include <mutex>
#include <tbb/concurrent_unordered_set.h>
namespace SPTAG
{
@ -48,14 +48,16 @@ namespace SPTAG
std::string m_sKDTFilename;
std::string m_sGraphFilename;
std::string m_sDataPointsFilename;
std::string m_sDeleteDataPointsFilename;
std::mutex m_dataLock; // protect data and graph
tbb::concurrent_unordered_set<int> m_deletedID;
std::mutex m_dataAddLock; // protect data and graph
Helper::Concurrent::ConcurrentSet<SizeType> m_deletedID;
float m_fDeletePercentageForRefine;
std::unique_ptr<COMMON::WorkSpacePool> m_workSpacePool;
int m_iNumberOfThreads;
DistCalcMethod m_iDistCalcMethod;
float(*m_fComputeDistance)(const T* pX, const T* pY, int length);
float(*m_fComputeDistance)(const T* pX, const T* pY, DimensionType length);
int m_iMaxCheck;
int m_iThresholdOfNumberOfContinuousNoBetterPropagation;
@ -63,20 +65,21 @@ namespace SPTAG
int m_iNumberOfOtherDynamicPivots;
public:
Index()
{
{
#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \
VarName = DefaultValue; \
#include "inc/Core/KDT/ParameterDefinitionList.h"
#undef DefineKDTParameter
m_fComputeDistance = COMMON::DistanceCalcSelector<T>(m_iDistCalcMethod);
}
m_pSamples.SetName("Vector");
m_fComputeDistance = COMMON::DistanceCalcSelector<T>(m_iDistCalcMethod);
}
~Index() {}
inline int GetNumSamples() const { return m_pSamples.R(); }
inline int GetFeatureDim() const { return m_pSamples.C(); }
inline SizeType GetNumSamples() const { return m_pSamples.R(); }
inline DimensionType GetFeatureDim() const { return m_pSamples.C(); }
inline int GetCurrMaxCheck() const { return m_iMaxCheck; }
inline int GetNumThreads() const { return m_iNumberOfThreads; }
@ -85,25 +88,41 @@ namespace SPTAG
inline VectorValueType GetVectorValueType() const { return GetEnumValueType<T>(); }
inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); }
inline const void* GetSample(const int idx) const { return (void*)m_pSamples[idx]; }
inline const void* GetSample(const SizeType idx) const { return (void*)m_pSamples[idx]; }
inline bool ContainSample(const SizeType idx) const { return !m_deletedID.contains(idx); }
inline bool NeedRefine() const { return m_deletedID.size() >= (size_t)(GetNumSamples() * m_fDeletePercentageForRefine); }
std::shared_ptr<std::vector<std::uint64_t>> BufferSize() const
{
std::shared_ptr<std::vector<std::uint64_t>> buffersize(new std::vector<std::uint64_t>);
buffersize->push_back(m_pSamples.BufferSize());
buffersize->push_back(m_pTrees.BufferSize());
buffersize->push_back(m_pGraph.BufferSize());
buffersize->push_back(m_deletedID.bufferSize());
return std::move(buffersize);
}
ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension);
ErrorCode SaveConfig(std::ostream& p_configout) const;
ErrorCode SaveIndexData(const std::string& p_folderPath);
ErrorCode SaveIndexData(const std::vector<std::ostream*>& p_indexStreams);
ErrorCode SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t>& p_indexBlobsLen);
ErrorCode LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs);
ErrorCode LoadConfig(Helper::IniReader& p_reader);
ErrorCode LoadIndexData(const std::string& p_folderPath);
ErrorCode LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs);
ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout);
ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader);
ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension);
ErrorCode SearchIndex(QueryResult &p_query) const;
ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension);
ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum);
ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr);
ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum);
ErrorCode DeleteIndex(const SizeType& p_id);
ErrorCode SetParameter(const char* p_param, const char* p_value);
std::string GetParameter(const char* p_param) const;
private:
ErrorCode RefineIndex(const std::string& p_folderPath);
void SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set<int> &p_deleted) const;
ErrorCode RefineIndex(const std::vector<std::ostream*>& p_indexStreams);
private:
void SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet<SizeType> &p_deleted) const;
void SearchIndexWithoutDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space) const;
};
} // namespace KDT

View File

@ -7,16 +7,17 @@
DefineKDTParameter(m_sKDTFilename, std::string, std::string("tree.bin"), "TreeFilePath")
DefineKDTParameter(m_sGraphFilename, std::string, std::string("graph.bin"), "GraphFilePath")
DefineKDTParameter(m_sDataPointsFilename, std::string, std::string("vectors.bin"), "VectorFilePath")
DefineKDTParameter(m_sDeleteDataPointsFilename, std::string, std::string("deletes.bin"), "DeleteVectorFilePath")
DefineKDTParameter(m_pTrees.m_iTreeNumber, int, 1L, "KDTNumber")
DefineKDTParameter(m_pTrees.m_numTopDimensionKDTSplit, int, 5L, "NumTopDimensionKDTSplit")
DefineKDTParameter(m_pTrees.m_iSamples, int, 100L, "NumSamplesKDTSplitConsideration")
DefineKDTParameter(m_pTrees.m_iSamples, int, 100L, "Samples")
DefineKDTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TPTNumber")
DefineKDTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize")
DefineKDTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTPTSplit")
DefineKDTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize")
DefineKDTParameter(m_pGraph.m_iNeighborhoodSize, DimensionType, 32L, "NeighborhoodSize")
DefineKDTParameter(m_pGraph.m_iNeighborhoodScale, int, 2L, "GraphNeighborhoodScale")
DefineKDTParameter(m_pGraph.m_iCEFScale, int, 2L, "GraphCEFScale")
DefineKDTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations")
@ -26,6 +27,7 @@ DefineKDTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckFor
DefineKDTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads")
DefineKDTParameter(m_iDistCalcMethod, SPTAG::DistCalcMethod, SPTAG::DistCalcMethod::Cosine, "DistCalcMethod")
DefineKDTParameter(m_fDeletePercentageForRefine, float, 0.4F, "DeletePercentageForRefine")
DefineKDTParameter(m_iMaxCheck, int, 8192L, "MaxCheck")
DefineKDTParameter(m_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation")
DefineKDTParameter(m_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots")

View File

@ -19,23 +19,23 @@ public:
virtual ~MetadataSet();
virtual ByteArray GetMetadata(IndexType p_vectorID) const = 0;
virtual ByteArray GetMetadata(SizeType p_vectorID) const = 0;
virtual SizeType Count() const = 0;
virtual bool Available() const = 0;
virtual std::pair<std::uint64_t, std::uint64_t> BufferSize() const = 0;
virtual void AddBatch(MetadataSet& data) = 0;
virtual ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut) = 0;
virtual ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) = 0;
virtual ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) = 0;
virtual ErrorCode RefineMetadata(std::vector<SizeType>& indices, std::ostream& p_metaOut, std::ostream& p_metaIndexOut);
virtual ErrorCode LoadMetadataFromMemory(void *pGraphMemFile) = 0;
virtual ErrorCode RefineMetadata(std::vector<int>& indices, const std::string& p_folderPath);
static ErrorCode MetaCopy(const std::string& p_src, const std::string& p_dst);
virtual ErrorCode RefineMetadata(std::vector<SizeType>& indices, const std::string& p_metaFile, const std::string& p_metaindexFile);
};
@ -46,19 +46,20 @@ public:
~FileMetadataSet();
ByteArray GetMetadata(IndexType p_vectorID) const;
ByteArray GetMetadata(SizeType p_vectorID) const;
SizeType Count() const;
bool Available() const;
std::pair<std::uint64_t, std::uint64_t> BufferSize() const;
void AddBatch(MetadataSet& data);
ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut);
ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile);
ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len);
ErrorCode LoadMetadataFromMemory(void *pGraphMemFile);
private:
std::ifstream* m_fp = nullptr;
@ -77,25 +78,24 @@ private:
class MemMetadataSet : public MetadataSet
{
public:
MemMetadataSet() = default;
MemMetadataSet(ByteArray p_metadata, ByteArray p_offsets, SizeType p_count);
~MemMetadataSet();
ByteArray GetMetadata(IndexType p_vectorID) const;
ByteArray GetMetadata(SizeType p_vectorID) const;
SizeType Count() const;
bool Available() const;
std::pair<std::uint64_t, std::uint64_t> BufferSize() const;
void AddBatch(MetadataSet& data);
ErrorCode SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut);
ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile);
ErrorCode SaveMetadataToMemory(void **pGraphMemFile, int64_t &len);
ErrorCode LoadMetadataFromMemory(void *pGraphMemFile);
private:
std::vector<std::uint64_t> m_offsets;

View File

@ -4,24 +4,13 @@
#ifndef _SPTAG_SEARCHQUERY_H_
#define _SPTAG_SEARCHQUERY_H_
#include "CommonDataStructure.h"
#include "SearchResult.h"
#include <cstring>
namespace SPTAG
{
struct BasicResult
{
int VID;
float Dist;
BasicResult() : VID(-1), Dist(MaxDist) {}
BasicResult(int p_vid, float p_dist) : VID(p_vid), Dist(p_dist) {}
};
// Space to save temporary answer, similar with TopKCache
class QueryResult
{
@ -38,39 +27,26 @@ public:
QueryResult(const void* p_target, int p_resultNum, bool p_withMeta)
: m_target(nullptr),
m_resultNum(0),
m_withMeta(false)
{
Init(p_target, p_resultNum, p_withMeta);
}
QueryResult(const void* p_target, int p_resultNum, std::vector<BasicResult>& p_results)
QueryResult(const void* p_target, int p_resultNum, bool p_withMeta, BasicResult* p_results)
: m_target(p_target),
m_resultNum(p_resultNum),
m_withMeta(false)
m_withMeta(p_withMeta)
{
p_results.resize(p_resultNum);
m_results.reset(p_results.data());
m_results.Set(p_results, p_resultNum, false);
}
QueryResult(const QueryResult& p_other)
: m_target(p_other.m_target),
m_resultNum(p_other.m_resultNum),
m_withMeta(p_other.m_withMeta)
{
Init(p_other.m_target, p_other.m_resultNum, p_other.m_withMeta);
if (m_resultNum > 0)
{
m_results.reset(new BasicResult[m_resultNum]);
std::memcpy(m_results.get(), p_other.m_results.get(), sizeof(BasicResult) * m_resultNum);
if (m_withMeta)
{
m_metadatas.reset(new ByteArray[m_resultNum]);
std::copy(p_other.m_metadatas.get(), p_other.m_metadatas.get() + m_resultNum, m_metadatas.get());
}
std::copy(p_other.m_results.Data(), p_other.m_results.Data() + m_resultNum, m_results.Data());
}
}
@ -78,14 +54,9 @@ public:
QueryResult& operator=(const QueryResult& p_other)
{
Init(p_other.m_target, p_other.m_resultNum, p_other.m_withMeta);
if (m_resultNum > 0)
{
std::memcpy(m_results.get(), p_other.m_results.get(), sizeof(BasicResult) * m_resultNum);
if (m_withMeta)
{
std::copy(p_other.m_metadatas.get(), p_other.m_metadatas.get() + m_resultNum, m_metadatas.get());
}
std::copy(p_other.m_results.Data(), p_other.m_results.Data() + m_resultNum, m_results.Data());
}
return *this;
@ -100,18 +71,10 @@ public:
inline void Init(const void* p_target, int p_resultNum, bool p_withMeta)
{
m_target = p_target;
if (p_resultNum > m_resultNum)
{
m_results.reset(new BasicResult[p_resultNum]);
}
if (p_withMeta && (!m_withMeta || p_resultNum > m_resultNum))
{
m_metadatas.reset(new ByteArray[p_resultNum]);
}
m_resultNum = p_resultNum;
m_withMeta = p_withMeta;
m_results = Array<BasicResult>::Alloc(p_resultNum);
}
@ -135,11 +98,11 @@ public:
inline BasicResult* GetResult(int i) const
{
return i < m_resultNum ? m_results.get() + i : nullptr;
return i < m_resultNum ? m_results.Data() + i : nullptr;
}
inline void SetResult(int p_index, int p_VID, float p_dist)
inline void SetResult(int p_index, SizeType p_VID, float p_dist)
{
if (p_index < m_resultNum)
{
@ -151,7 +114,7 @@ public:
inline BasicResult* GetResults() const
{
return m_results.get();
return m_results.Data();
}
@ -165,7 +128,7 @@ public:
{
if (p_index < m_resultNum && m_withMeta)
{
return m_metadatas[p_index];
return m_results[p_index].Meta;
}
return ByteArray::c_empty;
@ -176,7 +139,7 @@ public:
{
if (p_index < m_resultNum && m_withMeta)
{
m_metadatas[p_index] = std::move(p_metadata);
m_results[p_index].Meta = std::move(p_metadata);
}
}
@ -187,39 +150,32 @@ public:
{
m_results[i].VID = -1;
m_results[i].Dist = MaxDist;
}
if (m_withMeta)
{
for (int i = 0; i < m_resultNum; i++)
{
m_metadatas[i].Clear();
}
m_results[i].Meta.Clear();
}
}
iterator begin()
{
return m_results.get();
return m_results.Data();
}
iterator end()
{
return m_results.get() + m_resultNum;
return m_results.Data() + m_resultNum;
}
const_iterator begin() const
{
return m_results.get();
return m_results.Data();
}
const_iterator end() const
{
return m_results.get() + m_resultNum;
return m_results.Data() + m_resultNum;
}
@ -230,9 +186,7 @@ protected:
bool m_withMeta;
std::unique_ptr<BasicResult[]> m_results;
std::unique_ptr<ByteArray[]> m_metadatas;
Array<BasicResult> m_results;
};
} // namespace SPTAG

View File

@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef _SPTAG_SEARCHRESULT_H_
#define _SPTAG_SEARCHRESULT_H_
#include "CommonDataStructure.h"
namespace SPTAG
{
struct BasicResult
{
SizeType VID;
float Dist;
ByteArray Meta;
BasicResult() : VID(-1), Dist(MaxDist) {}
BasicResult(SizeType p_vid, float p_dist) : VID(p_vid), Dist(p_dist) {}
BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta) : VID(p_vid), Dist(p_dist), Meta(p_meta) {}
};
} // namespace SPTAG
#endif // _SPTAG_SEARCHRESULT_H_

View File

@ -10,6 +10,8 @@
#include "MetadataSet.h"
#include "inc/Helper/SimpleIniReader.h"
#include <unordered_map>
namespace SPTAG
{
@ -20,59 +22,58 @@ public:
virtual ~VectorIndex();
virtual ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout) = 0;
virtual ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension) = 0;
virtual ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader) = 0;
virtual ErrorCode AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start = nullptr) = 0;
virtual ErrorCode SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t>& p_indexBlobsLen) = 0;
virtual ErrorCode LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs) = 0;
virtual ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension) = 0;
virtual ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum) = 0;
virtual ErrorCode SearchIndex(QueryResult& p_results) const = 0;
virtual ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) = 0;
virtual ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum) = 0;
//virtual ErrorCode AddIndexWithID(const void* p_vector, const int& p_id) = 0;
//virtual ErrorCode DeleteIndexWithID(const void* p_vector, const int& p_id) = 0;
virtual float ComputeDistance(const void* pX, const void* pY) const = 0;
virtual const void* GetSample(const int idx) const = 0;
virtual int GetFeatureDim() const = 0;
virtual int GetNumSamples() const = 0;
virtual const void* GetSample(const SizeType idx) const = 0;
virtual bool ContainSample(const SizeType idx) const = 0;
virtual bool NeedRefine() const = 0;
virtual DimensionType GetFeatureDim() const = 0;
virtual SizeType GetNumSamples() const = 0;
virtual DistCalcMethod GetDistCalcMethod() const = 0;
virtual IndexAlgoType GetIndexAlgoType() const = 0;
virtual VectorValueType GetVectorValueType() const = 0;
virtual int GetNumThreads() const = 0;
virtual std::string GetParameter(const char* p_param) const = 0;
virtual ErrorCode SetParameter(const char* p_param, const char* p_value) = 0;
virtual std::shared_ptr<std::vector<std::uint64_t>> CalculateBufferSize() const;
virtual ErrorCode LoadIndex(const std::string& p_config, const std::vector<ByteArray>& p_indexBlobs);
virtual ErrorCode LoadIndex(const std::string& p_folderPath);
virtual ErrorCode SaveIndex(std::string& p_config, const std::vector<ByteArray>& p_indexBlobs);
virtual ErrorCode SaveIndex(const std::string& p_folderPath);
virtual ErrorCode BuildIndex(std::shared_ptr<VectorSet> p_vectorSet, std::shared_ptr<MetadataSet> p_metadataSet);
virtual ErrorCode SearchIndex(const void* p_vector, int p_neighborCount, std::vector<BasicResult>& p_results) const;
virtual ErrorCode BuildIndex(std::shared_ptr<VectorSet> p_vectorSet, std::shared_ptr<MetadataSet> p_metadataSet, bool p_withMetaIndex = false);
virtual ErrorCode AddIndex(std::shared_ptr<VectorSet> p_vectorSet, std::shared_ptr<MetadataSet> p_metadataSet);
virtual ErrorCode DeleteIndex(ByteArray p_meta);
virtual const void* GetSample(ByteArray p_meta);
virtual ErrorCode SearchIndex(const void* p_vector, int p_neighborCount, bool p_withMeta, BasicResult* p_results) const;
virtual std::string GetParameter(const std::string& p_param) const;
virtual ErrorCode SetParameter(const std::string& p_param, const std::string& p_value);
virtual ByteArray GetMetadata(IndexType p_vectorID) const;
virtual ByteArray GetMetadata(SizeType p_vectorID) const;
virtual void SetMetadata(const std::string& p_metadataFilePath, const std::string& p_metadataIndexPath);
virtual std::string GetIndexName() const
{
if (m_sIndexName == "")
return Helper::Convert::ConvertToString(GetIndexAlgoType());
if (m_sIndexName == "") return Helper::Convert::ConvertToString(GetIndexAlgoType());
return m_sIndexName;
}
virtual void SetIndexName(std::string p_name) { m_sIndexName = p_name; }
@ -83,9 +84,42 @@ public:
static ErrorCode LoadIndex(const std::string& p_loaderFilePath, std::shared_ptr<VectorIndex>& p_vectorIndex);
static ErrorCode LoadIndex(const std::string& p_config, const std::vector<ByteArray>& p_indexBlobs, std::shared_ptr<VectorIndex>& p_vectorIndex);
protected:
virtual std::shared_ptr<std::vector<std::uint64_t>> BufferSize() const = 0;
virtual ErrorCode SaveConfig(std::ostream& p_configout) const = 0;
virtual ErrorCode SaveIndexData(const std::string& p_folderPath) = 0;
virtual ErrorCode SaveIndexData(const std::vector<std::ostream*>& p_indexStreams) = 0;
virtual ErrorCode LoadConfig(Helper::IniReader& p_reader) = 0;
virtual ErrorCode LoadIndexData(const std::string& p_folderPath) = 0;
virtual ErrorCode LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs) = 0;
virtual ErrorCode DeleteIndex(const SizeType& p_id) = 0;
virtual ErrorCode RefineIndex(const std::string& p_folderPath) = 0;
virtual ErrorCode RefineIndex(const std::vector<std::ostream*>& p_indexStreams) = 0;
private:
void BuildMetaMapping();
ErrorCode LoadIndexConfig(Helper::IniReader& p_reader);
ErrorCode SaveIndexConfig(std::ostream& p_configOut);
protected:
std::string m_sIndexName;
std::string m_sMetadataFile = "metadata.bin";
std::string m_sMetadataIndexFile = "metadataIndex.bin";
std::shared_ptr<MetadataSet> m_pMetadata;
std::unique_ptr<std::unordered_map<std::string, SizeType>> m_pMetaToVec;
};

View File

@ -18,11 +18,11 @@ public:
virtual VectorValueType GetValueType() const = 0;
virtual void* GetVector(IndexType p_vectorID) const = 0;
virtual void* GetVector(SizeType p_vectorID) const = 0;
virtual void* GetData() const = 0;
virtual SizeType Dimension() const = 0;
virtual DimensionType Dimension() const = 0;
virtual SizeType Count() const = 0;
@ -37,18 +37,18 @@ class BasicVectorSet : public VectorSet
public:
BasicVectorSet(const ByteArray& p_bytesArray,
VectorValueType p_valueType,
SizeType p_dimension,
DimensionType p_dimension,
SizeType p_vectorCount);
virtual ~BasicVectorSet();
virtual VectorValueType GetValueType() const;
virtual void* GetVector(IndexType p_vectorID) const;
virtual void* GetVector(SizeType p_vectorID) const;
virtual void* GetData() const;
virtual SizeType Dimension() const;
virtual DimensionType Dimension() const;
virtual SizeType Count() const;
@ -61,7 +61,7 @@ private:
VectorValueType m_valueType;
SizeType m_dimension;
DimensionType m_dimension;
SizeType m_vectorCount;

View File

@ -0,0 +1,39 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef _SPTAG_HELPER_BUFFERSTREAM_H_
#define _SPTAG_HELPER_BUFFERSTREAM_H_
#include <streambuf>
#include <ostream>
#include <memory>
namespace SPTAG
{
namespace Helper
{
struct streambuf : public std::basic_streambuf<char>
{
streambuf(char* buffer, size_t size)
{
setp(buffer, buffer + size);
}
};
class obufferstream : public std::ostream
{
public:
obufferstream(streambuf* buf, bool transferOwnership) : std::ostream(buf)
{
if (transferOwnership)
m_bufHolder.reset(buf, std::default_delete<streambuf>());
}
private:
std::shared_ptr<streambuf> m_bufHolder;
};
} // namespace Helper
} // namespace SPTAG
#endif // _SPTAG_HELPER_BUFFERSTREAM_H_

View File

@ -0,0 +1,148 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef _SPTAG_HELPER_CONCURRENTSET_H_
#define _SPTAG_HELPER_CONCURRENTSET_H_
#include <shared_mutex>
#include <unordered_set>
namespace SPTAG
{
namespace Helper
{
namespace Concurrent
{
template <typename T>
class ConcurrentSet
{
public:
ConcurrentSet();
~ConcurrentSet();
size_t size() const;
bool contains(const T& key) const;
void insert(const T& key);
std::shared_timed_mutex& getLock();
bool save(std::ostream& output);
bool save(std::string filename);
bool load(std::string filename);
bool load(char* pmemoryFile);
std::uint64_t bufferSize() const;
private:
std::unique_ptr<std::shared_timed_mutex> m_lock;
std::unordered_set<T> m_data;
};
template<typename T>
ConcurrentSet<T>::ConcurrentSet()
{
m_lock.reset(new std::shared_timed_mutex);
}
template<typename T>
ConcurrentSet<T>::~ConcurrentSet()
{
}
template<typename T>
size_t ConcurrentSet<T>::size() const
{
std::shared_lock<std::shared_timed_mutex> lock(*m_lock);
return m_data.size();
}
template<typename T>
bool ConcurrentSet<T>::contains(const T& key) const
{
std::shared_lock<std::shared_timed_mutex> lock(*m_lock);
return (m_data.find(key) != m_data.end());
}
template<typename T>
void ConcurrentSet<T>::insert(const T& key)
{
std::unique_lock<std::shared_timed_mutex> lock(*m_lock);
m_data.insert(key);
}
template<typename T>
std::shared_timed_mutex& ConcurrentSet<T>::getLock()
{
return *m_lock;
}
template<typename T>
std::uint64_t ConcurrentSet<T>::bufferSize() const
{
return sizeof(SizeType) + sizeof(T) * m_data.size();
}
template<typename T>
bool ConcurrentSet<T>::save(std::ostream& output)
{
SizeType count = (SizeType)m_data.size();
output.write((char*)&count, sizeof(SizeType));
for (auto iter = m_data.begin(); iter != m_data.end(); iter++)
output.write((char*)&(*iter), sizeof(T));
std::cout << "Save DeleteID (" << count << ") Finish!" << std::endl;
return true;
}
template<typename T>
bool ConcurrentSet<T>::save(std::string filename)
{
std::cout << "Save DeleteID To " << filename << std::endl;
std::ofstream output(filename, std::ios::binary);
if (!output.is_open()) return false;
save(output);
output.close();
return true;
}
template<typename T>
bool ConcurrentSet<T>::load(std::string filename)
{
std::cout << "Load DeleteID From " << filename << std::endl;
std::ifstream input(filename, std::ios::binary);
if (!input.is_open()) return false;
SizeType count;
T ID;
input.read((char*)&count, sizeof(SizeType));
for (SizeType i = 0; i < count; i++)
{
input.read((char*)&ID, sizeof(T));
m_data.insert(ID);
}
input.close();
std::cout << "Load DeleteID (" << count << ") Finish!" << std::endl;
return true;
}
template<typename T>
bool ConcurrentSet<T>::load(char* pmemoryFile)
{
SizeType count;
count = *((SizeType*)pmemoryFile);
pmemoryFile += sizeof(SizeType);
m_data.insert((T*)pmemoryFile, ((T*)pmemoryFile) + count);
pmemoryFile += sizeof(T) * count;
std::cout << "Load DeleteID (" << count << ") Finish!" << std::endl;
return true;
}
}
}
}
#endif // _SPTAG_HELPER_CONCURRENTSET_H_

View File

@ -31,6 +31,8 @@ public:
ErrorCode LoadIniFile(const std::string& p_iniFilePath);
ErrorCode LoadIni(std::istream& p_input);
bool DoesSectionExist(const std::string& p_section) const;
bool DoesParameterExist(const std::string& p_section, const std::string& p_param) const;

View File

@ -0,0 +1,59 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef _SPTAG_HELPER_VECTORSETREADER_H_
#define _SPTAG_HELPER_VECTORSETREADER_H_
#include "inc/Core/Common.h"
#include "inc/Core/VectorSet.h"
#include "inc/Core/MetadataSet.h"
#include "inc/Helper/ArgumentsParser.h"
#include <memory>
namespace SPTAG
{
namespace Helper
{
class ReaderOptions : public ArgumentsParser
{
public:
ReaderOptions(VectorValueType p_valueType, DimensionType p_dimension, std::string p_vectorDelimiter = "|", std::uint32_t p_threadNum = 32);
~ReaderOptions();
std::uint32_t m_threadNum;
DimensionType m_dimension;
std::string m_vectorDelimiter;
SPTAG::VectorValueType m_inputValueType;
};
class VectorSetReader
{
public:
VectorSetReader(std::shared_ptr<ReaderOptions> p_options);
virtual ~VectorSetReader();
virtual ErrorCode LoadFile(const std::string& p_filePath) = 0;
virtual std::shared_ptr<VectorSet> GetVectorSet() const = 0;
virtual std::shared_ptr<MetadataSet> GetMetadataSet() const = 0;
static std::shared_ptr<VectorSetReader> CreateInstance(std::shared_ptr<ReaderOptions> p_options);
protected:
std::shared_ptr<ReaderOptions> m_options;
};
} // namespace Helper
} // namespace SPTAG
#endif // _SPTAG_HELPER_VECTORSETREADER_H_

View File

@ -1,8 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULTREADER_H_
#define _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULTREADER_H_
#ifndef _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_
#define _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_
#include "../VectorSetReader.h"
#include "inc/Helper/Concurrent.h"
@ -13,13 +13,13 @@
namespace SPTAG
{
namespace IndexBuilder
namespace Helper
{
class DefaultReader : public VectorSetReader
{
public:
DefaultReader(std::shared_ptr<BuilderOptions> p_options);
DefaultReader(std::shared_ptr<ReaderOptions> p_options);
virtual ~DefaultReader();
@ -44,7 +44,7 @@ private:
template<typename DataType>
bool TranslateVector(char* p_str, DataType* p_vector)
{
std::uint32_t eleCount = 0;
DimensionType eleCount = 0;
char* next = p_str;
while ((*next) != '\0')
{
@ -85,11 +85,11 @@ private:
std::size_t m_subTaskBlocksize;
std::atomic<std::uint32_t> m_totalRecordCount;
std::atomic<SizeType> m_totalRecordCount;
std::atomic<std::size_t> m_totalRecordVectorBytes;
std::vector<std::uint32_t> m_subTaskRecordCount;
std::vector<SizeType> m_subTaskRecordCount;
std::string m_vectorOutput;
@ -102,7 +102,7 @@ private:
} // namespace IndexBuilder
} // namespace Helper
} // namespace SPTAG
#endif // _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULT_H_
#endif // _SPTAG_HELPER_VECTORSETREADERS_DEFAULT_H_

View File

@ -5,7 +5,7 @@
#define _SPTAG_INDEXBUILDER_OPTIONS_H_
#include "inc/Core/Common.h"
#include "inc/Helper/ArgumentsParser.h"
#include "inc/Helper/VectorSetReader.h"
#include <string>
#include <vector>
@ -16,21 +16,13 @@ namespace SPTAG
namespace IndexBuilder
{
class BuilderOptions : public Helper::ArgumentsParser
class BuilderOptions : public Helper::ReaderOptions
{
public:
BuilderOptions();
~BuilderOptions();
std::uint32_t m_threadNum;
std::uint32_t m_dimension;
std::string m_vectorDelimiter;
SPTAG::VectorValueType m_inputValueType;
std::string m_inputFiles;
std::string m_outputFolder;

View File

@ -1,43 +0,0 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef _SPTAG_INDEXBUILDER_VECTORSETREADER_H_
#define _SPTAG_INDEXBUILDER_VECTORSETREADER_H_
#include "inc/Core/Common.h"
#include "inc/Core/VectorSet.h"
#include "inc/Core/MetadataSet.h"
#include "Options.h"
#include <memory>
namespace SPTAG
{
namespace IndexBuilder
{
class VectorSetReader
{
public:
VectorSetReader(std::shared_ptr<BuilderOptions> p_options);
virtual ~VectorSetReader();
virtual ErrorCode LoadFile(const std::string& p_filePath) = 0;
virtual std::shared_ptr<VectorSet> GetVectorSet() const = 0;
virtual std::shared_ptr<MetadataSet> GetMetadataSet() const = 0;
static std::shared_ptr<VectorSetReader> CreateInstance(std::shared_ptr<BuilderOptions> p_options);
protected:
std::shared_ptr<BuilderOptions> m_options;
};
} // namespace IndexBuilder
} // namespace SPTAG
#endif // _SPTAG_INDEXBUILDER_VECTORSETREADER_H_

View File

@ -7,6 +7,4 @@
<package id="boost_system-vc140" version="1.67.0.0" targetFramework="native" />
<package id="boost_thread-vc140" version="1.67.0.0" targetFramework="native" />
<package id="boost_wserialization-vc140" version="1.67.0.0" targetFramework="native" />
<package id="tbb_oss" version="9.107.0.0" targetFramework="native" />
<package id="tbb_oss.redist" version="9.107.0.0" targetFramework="native" />
</packages>

View File

@ -53,19 +53,19 @@ int main(int argc, char** argv)
for (const auto& indexRes : result.m_allIndexResults)
{
fprintf(stdout, "Index: %s\n", indexRes.m_indexName.c_str());
std::cout << "Index: " << indexRes.m_indexName << std::endl;
int idx = 0;
for (const auto& res : indexRes.m_results)
{
fprintf(stdout, "------------------\n");
fprintf(stdout, "DocIndex: %d Distance: %f\n", res.VID, res.Dist);
std::cout << "------------------" << std::endl;
std::cout << "DocIndex: " << res.VID << " Distance: " << res.Dist;
if (indexRes.m_results.WithMeta())
{
const auto& metadata = indexRes.m_results.GetMetadata(idx);
fprintf(stdout, " MetaData: %.*s\n", static_cast<int>(metadata.Length()), metadata.Data());
std::cout << " MetaData: " << std::string((char*)metadata.Data(), metadata.Length());
}
std::cout << std::endl;
++idx;
}
}

View File

@ -13,22 +13,7 @@ namespace SPTAG
namespace BKT
{
template <typename T>
ErrorCode Index<T>::LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs)
{
if (!m_pSamples.Load((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue;
if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue;
if (!m_pGraph.LoadGraphFromMemory((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue;
m_pMetadata = std::make_shared<MemMetadataSet>();
if (ErrorCode::Success != m_pMetadata->LoadMetadataFromMemory((char*)p_indexBlobs[3]))
return ErrorCode::FailedParseValue;
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
m_workSpacePool->Init(m_iNumberOfThreads);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader)
ErrorCode Index<T>::LoadConfig(Helper::IniReader& p_reader)
{
#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \
SetParameter(RepresentStr, \
@ -38,34 +23,96 @@ namespace SPTAG
#include "inc/Core/BKT/ParameterDefinitionList.h"
#undef DefineBKTParameter
return ErrorCode::Success;
}
if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
if (!m_pTrees.LoadTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail;
if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
template <typename T>
ErrorCode Index<T>::LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs)
{
if (p_indexBlobs.size() < 3) return ErrorCode::LackOfInputs;
if (!m_pSamples.Load((char*)p_indexBlobs[0].Data())) return ErrorCode::FailedParseValue;
if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1].Data())) return ErrorCode::FailedParseValue;
if (!m_pGraph.LoadGraph((char*)p_indexBlobs[2].Data())) return ErrorCode::FailedParseValue;
if (p_indexBlobs.size() > 3 && !m_deletedID.load((char*)p_indexBlobs[3].Data())) return ErrorCode::FailedParseValue;
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
m_workSpacePool->Init(m_iNumberOfThreads);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::LoadIndexData(const std::string& p_folderPath)
{
if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
if (!m_pTrees.LoadTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail;
if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
if (!m_deletedID.load(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail;
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
m_workSpacePool->Init(m_iNumberOfThreads);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::SaveConfig(std::ostream& p_configOut) const
{
#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \
p_configOut << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl;
#include "inc/Core/BKT/ParameterDefinitionList.h"
#undef DefineBKTParameter
p_configOut << std::endl;
return ErrorCode::Success;
}
template<typename T>
ErrorCode
Index<T>::SaveIndexData(const std::string& p_folderPath)
{
std::lock_guard<std::mutex> lock(m_dataAddLock);
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
if (!m_pTrees.SaveTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail;
if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
if (!m_deletedID.save(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail;
return ErrorCode::Success;
}
template<typename T>
ErrorCode Index<T>::SaveIndexData(const std::vector<std::ostream*>& p_indexStreams)
{
if (p_indexStreams.size() < 4) return ErrorCode::LackOfInputs;
std::lock_guard<std::mutex> lock(m_dataAddLock);
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
if (!m_pSamples.Save(*p_indexStreams[0])) return ErrorCode::Fail;
if (!m_pTrees.SaveTrees(*p_indexStreams[1])) return ErrorCode::Fail;
if (!m_pGraph.SaveGraph(*p_indexStreams[2])) return ErrorCode::Fail;
if (!m_deletedID.save(*p_indexStreams[3])) return ErrorCode::Fail;
return ErrorCode::Success;
}
#pragma region K-NN search
#define Search(CheckDeleted1) \
m_pTrees.InitSearchTrees(this, p_query, p_space); \
const int checkPos = m_pGraph.m_iNeighborhoodSize - 1; \
const DimensionType checkPos = m_pGraph.m_iNeighborhoodSize - 1; \
while (!p_space.m_SPTQueue.empty()) { \
m_pTrees.SearchTrees(this, p_query, p_space, m_iNumberOfOtherDynamicPivots + p_space.m_iNumberOfCheckedLeaves); \
while (!p_space.m_NGQueue.empty()) { \
COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); \
const int *node = m_pGraph[gnode.node]; \
const SizeType *node = m_pGraph[gnode.node]; \
_mm_prefetch((const char *)node, _MM_HINT_T0); \
CheckDeleted1 { \
if (p_query.AddPoint(gnode.node, gnode.distance)) { \
p_space.m_iNumOfContinuousNoBetterPropagation = 0; \
int checkNode = node[checkPos]; \
SizeType checkNode = node[checkPos]; \
if (checkNode < -1) { \
const COMMON::BKTNode& tnode = m_pTrees[-2 - checkNode]; \
for (int i = -tnode.childStart; i < tnode.childEnd; i++) { \
for (SizeType i = -tnode.childStart; i < tnode.childEnd; i++) { \
if (!p_query.AddPoint(m_pTrees[i].centerid, gnode.distance)) break; \
} \
} \
@ -77,11 +124,11 @@ namespace SPTAG
} \
} \
} \
for (int i = 0; i <= checkPos; i++) { \
for (DimensionType i = 0; i <= checkPos; i++) { \
_mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \
} \
for (int i = 0; i <= checkPos; i++) { \
int nn_index = node[i]; \
for (DimensionType i = 0; i <= checkPos; i++) { \
SizeType nn_index = node[i]; \
if (nn_index < 0) break; \
if (p_space.CheckAndSet(nn_index)) continue; \
float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \
@ -96,9 +143,9 @@ namespace SPTAG
p_query.SortResult(); \
template <typename T>
void Index<T>::SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set<int> &p_deleted) const
void Index<T>::SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet<SizeType> &p_deleted) const
{
Search(if (p_deleted.find(gnode.node) == p_deleted.end()))
Search(if (!p_deleted.contains(gnode.node)))
}
template <typename T>
@ -125,7 +172,7 @@ namespace SPTAG
{
for (int i = 0; i < p_query.GetResultNum(); ++i)
{
int result = p_query.GetResult(i)->VID;
SizeType result = p_query.GetResult(i)->VID;
p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result));
}
}
@ -134,7 +181,7 @@ namespace SPTAG
#pragma endregion
template <typename T>
ErrorCode Index<T>::BuildIndex(const void* p_data, int p_vectorNum, int p_dimension)
ErrorCode Index<T>::BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension)
{
omp_set_num_threads(m_iNumberOfThreads);
@ -144,20 +191,64 @@ namespace SPTAG
{
int base = COMMON::Utils::GetBase<T>();
#pragma omp parallel for
for (int i = 0; i < GetNumSamples(); i++) {
for (SizeType i = 0; i < GetNumSamples(); i++) {
COMMON::Utils::Normalize(m_pSamples[i], GetFeatureDim(), base);
}
}
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
m_workSpacePool->Init(m_iNumberOfThreads);
m_pTrees.BuildTrees<T>(this);
m_pGraph.BuildGraph<T>(this, &(m_pTrees.GetSampleMap()));
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::RefineIndex(const std::vector<std::ostream*>& p_indexStreams)
{
std::lock_guard<std::mutex> lock(m_dataAddLock);
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
SizeType newR = GetNumSamples();
std::vector<SizeType> indices;
std::vector<SizeType> reverseIndices(newR);
for (SizeType i = 0; i < newR; i++) {
if (!m_deletedID.contains(i)) {
indices.push_back(i);
reverseIndices[i] = i;
}
else {
while (m_deletedID.contains(newR - 1) && newR > i) newR--;
if (newR == i) break;
indices.push_back(newR - 1);
reverseIndices[newR - 1] = i;
newR--;
}
}
std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl;
if (false == m_pSamples.Refine(indices, *p_indexStreams[0])) return ErrorCode::Fail;
if (nullptr != m_pMetadata && (p_indexStreams.size() < 6 || ErrorCode::Success != m_pMetadata->RefineMetadata(indices, *p_indexStreams[4], *p_indexStreams[5]))) return ErrorCode::Fail;
COMMON::BKTree newTrees(m_pTrees);
newTrees.BuildTrees<T>(this, &indices);
#pragma omp parallel for
for (SizeType i = 0; i < newTrees.size(); i++) {
newTrees[i].centerid = reverseIndices[newTrees[i].centerid];
}
newTrees.SaveTrees(*p_indexStreams[1]);
m_pGraph.RefineGraph<T>(this, indices, reverseIndices, *p_indexStreams[2], &(newTrees.GetSampleMap()));
Helper::Concurrent::ConcurrentSet<SizeType> newDeletedID;
newDeletedID.save(*p_indexStreams[3]);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::RefineIndex(const std::string& p_folderPath)
{
@ -172,54 +263,40 @@ namespace SPTAG
mkdir(folderPath.c_str());
}
std::lock_guard<std::mutex> lock(m_dataLock);
int newR = GetNumSamples();
std::vector<int> indices;
std::vector<int> reverseIndices(newR);
for (int i = 0; i < newR; i++) {
if (m_deletedID.find(i) == m_deletedID.end()) {
indices.push_back(i);
reverseIndices[i] = i;
}
else {
while (m_deletedID.find(newR - 1) != m_deletedID.end() && newR > i) newR--;
if (newR == i) break;
indices.push_back(newR - 1);
reverseIndices[newR - 1] = i;
newR--;
}
std::vector<std::ostream*> streams;
streams.push_back(new std::ofstream(folderPath + m_sDataPointsFilename, std::ios::binary));
streams.push_back(new std::ofstream(folderPath + m_sBKTFilename, std::ios::binary));
streams.push_back(new std::ofstream(folderPath + m_sGraphFilename, std::ios::binary));
streams.push_back(new std::ofstream(folderPath + m_sDeleteDataPointsFilename, std::ios::binary));
if (nullptr != m_pMetadata)
{
streams.push_back(new std::ofstream(folderPath + m_sMetadataFile, std::ios::binary));
streams.push_back(new std::ofstream(folderPath + m_sMetadataIndexFile, std::ios::binary));
}
std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl;
for (size_t i = 0; i < streams.size(); i++)
if (!(((std::ofstream*)streams[i])->is_open())) return ErrorCode::FailedCreateFile;
if (false == m_pSamples.Refine(indices, folderPath + m_sDataPointsFilename)) return ErrorCode::FailedCreateFile;
if (nullptr != m_pMetadata && ErrorCode::Success != m_pMetadata->RefineMetadata(indices, folderPath)) return ErrorCode::FailedCreateFile;
ErrorCode ret = RefineIndex(streams);
COMMON::BKTree newTrees(m_pTrees);
newTrees.BuildTrees<T>(this, &indices);
#pragma omp parallel for
for (int i = 0; i < newTrees.size(); i++) {
newTrees[i].centerid = reverseIndices[newTrees[i].centerid];
for (size_t i = 0; i < streams.size(); i++)
{
((std::ofstream*)streams[i])->close();
delete streams[i];
}
newTrees.SaveTrees(folderPath + m_sBKTFilename);
m_pGraph.RefineGraph<T>(this, indices, reverseIndices, folderPath + m_sGraphFilename,
&(newTrees.GetSampleMap()));
return ErrorCode::Success;
return ret;
}
template <typename T>
ErrorCode Index<T>::DeleteIndex(const void* p_vectors, int p_vectorNum) {
ErrorCode Index<T>::DeleteIndex(const void* p_vectors, SizeType p_vectorNum) {
const T* ptr_v = (const T*)p_vectors;
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < p_vectorNum; i++) {
for (SizeType i = 0; i < p_vectorNum; i++) {
COMMON::QueryResultSet<T> query(ptr_v + i * GetFeatureDim(), m_pGraph.m_iCEF);
SearchIndex(query);
for (int i = 0; i < m_pGraph.m_iCEF; i++) {
if (query.GetResult(i)->Dist < 1e-6) {
std::lock_guard<std::mutex> lock(m_dataLock);
m_deletedID.insert(query.GetResult(i)->VID);
}
}
@ -228,40 +305,43 @@ namespace SPTAG
}
template <typename T>
ErrorCode Index<T>::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension)
ErrorCode Index<T>::DeleteIndex(const SizeType& p_id) {
m_deletedID.insert(p_id);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start)
{
int begin, end;
SizeType begin, end;
{
std::lock_guard<std::mutex> lock(m_dataLock);
if (GetNumSamples() == 0)
return BuildIndex(p_vectors, p_vectorNum, p_dimension);
if (p_dimension != GetFeatureDim())
return ErrorCode::FailedParseValue;
std::lock_guard<std::mutex> lock(m_dataAddLock);
begin = GetNumSamples();
end = GetNumSamples() + p_vectorNum;
m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum);
m_pGraph.AddBatch(p_vectorNum);
if (p_start != nullptr) *p_start = begin;
if (begin == 0) return BuildIndex(p_vectors, p_vectorNum, p_dimension);
if (m_pSamples.R() != end || m_pGraph.R() != end) {
if (p_dimension != GetFeatureDim()) return ErrorCode::FailedParseValue;
if (m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum) != ErrorCode::Success || m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success) {
std::cout << "Memory Error: Cannot alloc space for vectors" << std::endl;
m_pSamples.SetR(begin);
m_pGraph.SetR(begin);
return ErrorCode::Fail;
return ErrorCode::MemoryOverFlow;
}
if (DistCalcMethod::Cosine == m_iDistCalcMethod)
{
int base = COMMON::Utils::GetBase<T>();
for (int i = begin; i < end; i++) {
for (SizeType i = begin; i < end; i++) {
COMMON::Utils::Normalize((T*)m_pSamples[i], GetFeatureDim(), base);
}
}
}
for (int node = begin; node < end; node++)
for (SizeType node = begin; node < end; node++)
{
m_pGraph.RefineNode<T>(this, node, true);
}
@ -269,47 +349,6 @@ namespace SPTAG
return ErrorCode::Success;
}
template<typename T>
ErrorCode
Index<T>::SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t> &p_indexBlobsLen)
{
p_indexBlobs.resize(4);
p_indexBlobsLen.resize(4);
if (!m_pSamples.Save(&p_indexBlobs[0], p_indexBlobsLen[0])) return ErrorCode::Fail;
if (!m_pTrees.SaveTrees(&p_indexBlobs[1], p_indexBlobsLen[1])) return ErrorCode::Fail;
if (!m_pGraph.SaveGraphToMemory(&p_indexBlobs[2], p_indexBlobsLen[2])) return ErrorCode::Fail;
if (ErrorCode::Success != m_pMetadata->SaveMetadataToMemory(&p_indexBlobs[3], p_indexBlobsLen[3]))
return ErrorCode::Fail;
return ErrorCode::Success;
}
template<typename T>
ErrorCode
Index<T>::SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout)
{
m_sDataPointsFilename = "vectors.bin";
m_sBKTFilename = "tree.bin";
m_sGraphFilename = "graph.bin";
#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \
p_configout << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl;
#include "inc/Core/BKT/ParameterDefinitionList.h"
#undef DefineBKTParameter
p_configout << std::endl;
if (m_deletedID.size() > 0) {
RefineIndex(p_folderPath);
}
else {
if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
if (!m_pTrees.SaveTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail;
if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
}
return ErrorCode::Success;
}
template <typename T>
ErrorCode
Index<T>::SetParameter(const char* p_param, const char* p_value)

View File

@ -7,7 +7,7 @@ using namespace SPTAG;
using namespace SPTAG::COMMON;
WorkSpacePool::WorkSpacePool(int p_maxCheck, int p_vectorCount)
WorkSpacePool::WorkSpacePool(int p_maxCheck, SizeType p_vectorCount)
: m_maxCheck(p_maxCheck),
m_vectorCount(p_vectorCount)
{

View File

@ -1,132 +0,0 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "inc/Core/CommonDataStructure.h"
using namespace SPTAG;
const ByteArray ByteArray::c_empty;
ByteArray::ByteArray()
: m_data(nullptr),
m_length(0)
{
}
ByteArray::ByteArray(ByteArray&& p_right)
: m_data(p_right.m_data),
m_length(p_right.m_length),
m_dataHolder(std::move(p_right.m_dataHolder))
{
}
ByteArray::ByteArray(std::uint8_t* p_array, std::size_t p_length, bool p_transferOnwership)
: m_data(p_array),
m_length(p_length)
{
if (p_transferOnwership)
{
m_dataHolder.reset(m_data, std::default_delete<std::uint8_t[]>());
}
}
ByteArray::ByteArray(std::uint8_t* p_array, std::size_t p_length, std::shared_ptr<std::uint8_t> p_dataHolder)
: m_data(p_array),
m_length(p_length),
m_dataHolder(std::move(p_dataHolder))
{
}
ByteArray::ByteArray(const ByteArray& p_right)
: m_data(p_right.m_data),
m_length(p_right.m_length),
m_dataHolder(p_right.m_dataHolder)
{
}
ByteArray&
ByteArray::operator= (const ByteArray& p_right)
{
m_data = p_right.m_data;
m_length = p_right.m_length;
m_dataHolder = p_right.m_dataHolder;
return *this;
}
ByteArray&
ByteArray::operator= (ByteArray&& p_right)
{
m_data = p_right.m_data;
m_length = p_right.m_length;
m_dataHolder = std::move(p_right.m_dataHolder);
return *this;
}
ByteArray::~ByteArray()
{
}
ByteArray
ByteArray::Alloc(std::size_t p_length)
{
ByteArray byteArray;
if (0 == p_length)
{
return byteArray;
}
byteArray.m_dataHolder.reset(new std::uint8_t[p_length],
std::default_delete<std::uint8_t[]>());
byteArray.m_length = p_length;
byteArray.m_data = byteArray.m_dataHolder.get();
return byteArray;
}
std::uint8_t*
ByteArray::Data() const
{
return m_data;
}
std::size_t
ByteArray::Length() const
{
return m_length;
}
void
ByteArray::SetData(std::uint8_t* p_array, std::size_t p_length)
{
m_data = p_array;
m_length = p_length;
}
std::shared_ptr<std::uint8_t>
ByteArray::DataHolder() const
{
return m_dataHolder;
}
void
ByteArray::Clear()
{
m_data = nullptr;
m_dataHolder.reset();
m_length = 0;
}

View File

@ -13,22 +13,7 @@ namespace SPTAG
namespace KDT
{
template <typename T>
ErrorCode Index<T>::LoadIndexFromMemory(const std::vector<void*>& p_indexBlobs)
{
if (!m_pSamples.Load((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue;
if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue;
if (!m_pGraph.LoadGraphFromMemory((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue;
m_pMetadata = std::make_shared<MemMetadataSet>();
if (ErrorCode::Success != m_pMetadata->LoadMetadataFromMemory((char*)p_indexBlobs[3]))
return ErrorCode::FailedParseValue;
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
m_workSpacePool->Init(m_iNumberOfThreads);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader)
ErrorCode Index<T>::LoadConfig(Helper::IniReader& p_reader)
{
#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \
SetParameter(RepresentStr, \
@ -38,35 +23,96 @@ namespace SPTAG
#include "inc/Core/KDT/ParameterDefinitionList.h"
#undef DefineKDTParameter
return ErrorCode::Success;
}
if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
if (!m_pTrees.LoadTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail;
if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
template <typename T>
ErrorCode Index<T>::LoadIndexDataFromMemory(const std::vector<ByteArray>& p_indexBlobs)
{
if (p_indexBlobs.size() < 3) return ErrorCode::LackOfInputs;
if (!m_pSamples.Load((char*)p_indexBlobs[0].Data())) return ErrorCode::FailedParseValue;
if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1].Data())) return ErrorCode::FailedParseValue;
if (!m_pGraph.LoadGraph((char*)p_indexBlobs[2].Data())) return ErrorCode::FailedParseValue;
if (p_indexBlobs.size() > 3 && !m_deletedID.load((char*)p_indexBlobs[3].Data())) return ErrorCode::FailedParseValue;
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
m_workSpacePool->Init(m_iNumberOfThreads);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::LoadIndexData(const std::string& p_folderPath)
{
if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
if (!m_pTrees.LoadTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail;
if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
if (!m_deletedID.load(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail;
m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples()));
m_workSpacePool->Init(m_iNumberOfThreads);
return ErrorCode::Success;
}
template<typename T>
ErrorCode Index<T>::SaveConfig(std::ostream& p_configOut) const
{
#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \
p_configOut << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl;
#include "inc/Core/KDT/ParameterDefinitionList.h"
#undef DefineKDTParameter
p_configOut << std::endl;
return ErrorCode::Success;
}
template<typename T>
ErrorCode Index<T>::SaveIndexData(const std::string& p_folderPath)
{
std::lock_guard<std::mutex> lock(m_dataAddLock);
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
if (!m_pTrees.SaveTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail;
if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
if (!m_deletedID.save(p_folderPath + m_sDeleteDataPointsFilename)) return ErrorCode::Fail;
return ErrorCode::Success;
}
template<typename T>
ErrorCode Index<T>::SaveIndexData(const std::vector<std::ostream*>& p_indexStreams)
{
if (p_indexStreams.size() < 4) return ErrorCode::LackOfInputs;
std::lock_guard<std::mutex> lock(m_dataAddLock);
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
if (!m_pSamples.Save(*p_indexStreams[0])) return ErrorCode::Fail;
if (!m_pTrees.SaveTrees(*p_indexStreams[1])) return ErrorCode::Fail;
if (!m_pGraph.SaveGraph(*p_indexStreams[2])) return ErrorCode::Fail;
if (!m_deletedID.save(*p_indexStreams[3])) return ErrorCode::Fail;
return ErrorCode::Success;
}
#pragma region K-NN search
#define Search(CheckDeleted1) \
m_pTrees.InitSearchTrees(this, p_query, p_space, m_iNumberOfInitialDynamicPivots); \
while (!p_space.m_NGQueue.empty()) { \
COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); \
const int *node = m_pGraph[gnode.node]; \
const SizeType *node = m_pGraph[gnode.node]; \
_mm_prefetch((const char *)node, _MM_HINT_T0); \
CheckDeleted1 { \
if (!p_query.AddPoint(gnode.node, gnode.distance) && p_space.m_iNumberOfCheckedLeaves > p_space.m_iMaxCheck) { \
p_query.SortResult(); return; \
} \
} \
for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) \
for (DimensionType i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) \
_mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \
float upperBound = max(p_query.worstDist(), gnode.distance); \
bool bLocalOpt = true; \
for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) { \
int nn_index = node[i]; \
for (DimensionType i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) { \
SizeType nn_index = node[i]; \
if (nn_index < 0) break; \
if (p_space.CheckAndSet(nn_index)) continue; \
float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \
@ -87,9 +133,9 @@ namespace SPTAG
p_query.SortResult(); \
template <typename T>
void Index<T>::SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set<int> &p_deleted) const
void Index<T>::SearchIndexWithDeleted(COMMON::QueryResultSet<T> &p_query, COMMON::WorkSpace &p_space, const Helper::Concurrent::ConcurrentSet<SizeType> &p_deleted) const
{
Search(if (p_deleted.find(gnode.node) == p_deleted.end()))
Search(if (!p_deleted.contains(gnode.node)))
}
template <typename T>
@ -116,7 +162,7 @@ namespace SPTAG
{
for (int i = 0; i < p_query.GetResultNum(); ++i)
{
int result = p_query.GetResult(i)->VID;
SizeType result = p_query.GetResult(i)->VID;
p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result));
}
}
@ -125,7 +171,7 @@ namespace SPTAG
#pragma endregion
template <typename T>
ErrorCode Index<T>::BuildIndex(const void* p_data, int p_vectorNum, int p_dimension)
ErrorCode Index<T>::BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension)
{
omp_set_num_threads(m_iNumberOfThreads);
@ -135,7 +181,7 @@ namespace SPTAG
{
int base = COMMON::Utils::GetBase<T>();
#pragma omp parallel for
for (int i = 0; i < GetNumSamples(); i++) {
for (SizeType i = 0; i < GetNumSamples(); i++) {
COMMON::Utils::Normalize(m_pSamples[i], GetFeatureDim(), base);
}
}
@ -145,7 +191,54 @@ namespace SPTAG
m_pTrees.BuildTrees<T>(this);
m_pGraph.BuildGraph<T>(this);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::RefineIndex(const std::vector<std::ostream*>& p_indexStreams)
{
std::lock_guard<std::mutex> lock(m_dataAddLock);
std::shared_lock<std::shared_timed_mutex> sharedlock(m_deletedID.getLock());
SizeType newR = GetNumSamples();
std::vector<SizeType> indices;
std::vector<SizeType> reverseIndices(newR);
for (SizeType i = 0; i < newR; i++) {
if (!m_deletedID.contains(i)) {
indices.push_back(i);
reverseIndices[i] = i;
}
else {
while (m_deletedID.contains(newR - 1) && newR > i) newR--;
if (newR == i) break;
indices.push_back(newR - 1);
reverseIndices[newR - 1] = i;
newR--;
}
}
std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl;
if (false == m_pSamples.Refine(indices, *p_indexStreams[0])) return ErrorCode::Fail;
if (nullptr != m_pMetadata && (p_indexStreams.size() < 6 || ErrorCode::Success != m_pMetadata->RefineMetadata(indices, *p_indexStreams[4], *p_indexStreams[5]))) return ErrorCode::Fail;
m_pGraph.RefineGraph<T>(this, indices, reverseIndices, *p_indexStreams[2]);
COMMON::KDTree newTrees(m_pTrees);
newTrees.BuildTrees<T>(this, &indices);
#pragma omp parallel for
for (SizeType i = 0; i < newTrees.size(); i++) {
if (newTrees[i].left < 0)
newTrees[i].left = -reverseIndices[-newTrees[i].left - 1] - 1;
if (newTrees[i].right < 0)
newTrees[i].right = -reverseIndices[-newTrees[i].right - 1] - 1;
}
newTrees.SaveTrees(*p_indexStreams[1]);
Helper::Concurrent::ConcurrentSet<SizeType> newDeletedID;
newDeletedID.save(*p_indexStreams[3]);
return ErrorCode::Success;
}
@ -163,56 +256,40 @@ namespace SPTAG
mkdir(folderPath.c_str());
}
std::lock_guard<std::mutex> lock(m_dataLock);
int newR = GetNumSamples();
std::vector<int> indices;
std::vector<int> reverseIndices(newR);
for (int i = 0; i < newR; i++) {
if (m_deletedID.find(i) == m_deletedID.end()) {
indices.push_back(i);
reverseIndices[i] = i;
}
else {
while (m_deletedID.find(newR - 1) != m_deletedID.end() && newR > i) newR--;
if (newR == i) break;
indices.push_back(newR - 1);
reverseIndices[newR - 1] = i;
newR--;
}
std::vector<std::ostream*> streams;
streams.push_back(new std::ofstream(folderPath + m_sDataPointsFilename, std::ios::binary));
streams.push_back(new std::ofstream(folderPath + m_sKDTFilename, std::ios::binary));
streams.push_back(new std::ofstream(folderPath + m_sGraphFilename, std::ios::binary));
streams.push_back(new std::ofstream(folderPath + m_sDeleteDataPointsFilename, std::ios::binary));
if (nullptr != m_pMetadata)
{
streams.push_back(new std::ofstream(folderPath + m_sMetadataFile, std::ios::binary));
streams.push_back(new std::ofstream(folderPath + m_sMetadataIndexFile, std::ios::binary));
}
std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl;
for (size_t i = 0; i < streams.size(); i++)
if (!(((std::ofstream*)streams[i])->is_open())) return ErrorCode::FailedCreateFile;
if (false == m_pSamples.Refine(indices, folderPath + m_sDataPointsFilename)) return ErrorCode::FailedCreateFile;
if (nullptr != m_pMetadata && ErrorCode::Success != m_pMetadata->RefineMetadata(indices, folderPath)) return ErrorCode::FailedCreateFile;
m_pGraph.RefineGraph<T>(this, indices, reverseIndices, folderPath + m_sGraphFilename);
COMMON::KDTree newTrees(m_pTrees);
newTrees.BuildTrees<T>(this, &indices);
#pragma omp parallel for
for (int i = 0; i < newTrees.size(); i++) {
if (newTrees[i].left < 0)
newTrees[i].left = -reverseIndices[-newTrees[i].left - 1] - 1;
if (newTrees[i].right < 0)
newTrees[i].right = -reverseIndices[-newTrees[i].right - 1] - 1;
ErrorCode ret = RefineIndex(streams);
for (size_t i = 0; i < streams.size(); i++)
{
((std::ofstream*)streams[i])->close();
delete streams[i];
}
newTrees.SaveTrees(folderPath + m_sKDTFilename);
return ErrorCode::Success;
return ret;
}
template <typename T>
ErrorCode Index<T>::DeleteIndex(const void* p_vectors, int p_vectorNum) {
ErrorCode Index<T>::DeleteIndex(const void* p_vectors, SizeType p_vectorNum) {
const T* ptr_v = (const T*)p_vectors;
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < p_vectorNum; i++) {
for (SizeType i = 0; i < p_vectorNum; i++) {
COMMON::QueryResultSet<T> query(ptr_v + i * GetFeatureDim(), m_pGraph.m_iCEF);
SearchIndex(query);
for (int i = 0; i < m_pGraph.m_iCEF; i++) {
if (query.GetResult(i)->Dist < 1e-6) {
std::lock_guard<std::mutex> lock(m_dataLock);
m_deletedID.insert(query.GetResult(i)->VID);
}
}
@ -221,40 +298,43 @@ namespace SPTAG
}
template <typename T>
ErrorCode Index<T>::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension)
ErrorCode Index<T>::DeleteIndex(const SizeType& p_id) {
m_deletedID.insert(p_id);
return ErrorCode::Success;
}
template <typename T>
ErrorCode Index<T>::AddIndex(const void* p_vectors, SizeType p_vectorNum, DimensionType p_dimension, SizeType* p_start)
{
int begin, end;
SizeType begin, end;
{
std::lock_guard<std::mutex> lock(m_dataLock);
if (GetNumSamples() == 0)
return BuildIndex(p_vectors, p_vectorNum, p_dimension);
if (p_dimension != GetFeatureDim())
return ErrorCode::FailedParseValue;
std::lock_guard<std::mutex> lock(m_dataAddLock);
begin = GetNumSamples();
end = GetNumSamples() + p_vectorNum;
m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum);
m_pGraph.AddBatch(p_vectorNum);
if (p_start != nullptr) *p_start = begin;
if (m_pSamples.R() != end || m_pGraph.R() != end) {
if (begin == 0) return BuildIndex(p_vectors, p_vectorNum, p_dimension);
if (p_dimension != GetFeatureDim()) return ErrorCode::FailedParseValue;
if (m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum) != ErrorCode::Success || m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success) {
std::cout << "Memory Error: Cannot alloc space for vectors" << std::endl;
m_pSamples.SetR(begin);
m_pGraph.SetR(begin);
return ErrorCode::Fail;
return ErrorCode::MemoryOverFlow;
}
if (DistCalcMethod::Cosine == m_iDistCalcMethod)
{
int base = COMMON::Utils::GetBase<T>();
for (int i = begin; i < end; i++) {
for (SizeType i = begin; i < end; i++) {
COMMON::Utils::Normalize((T*)m_pSamples[i], GetFeatureDim(), base);
}
}
}
for (int node = begin; node < end; node++)
for (SizeType node = begin; node < end; node++)
{
m_pGraph.RefineNode<T>(this, node, true);
}
@ -262,47 +342,6 @@ namespace SPTAG
return ErrorCode::Success;
}
template<typename T>
ErrorCode
Index<T>::SaveIndexToMemory(std::vector<void*>& p_indexBlobs, std::vector<int64_t> &p_indexBlobsLen)
{
p_indexBlobs.resize(4);
p_indexBlobsLen.resize(4);
if (!m_pSamples.Save(&p_indexBlobs[0], p_indexBlobsLen[0])) return ErrorCode::Fail;
if (!m_pTrees.SaveTrees(&p_indexBlobs[1], p_indexBlobsLen[1])) return ErrorCode::Fail;
if (!m_pGraph.SaveGraphToMemory(&p_indexBlobs[2], p_indexBlobsLen[2])) return ErrorCode::Fail;
if (ErrorCode::Success != m_pMetadata->SaveMetadataToMemory(&p_indexBlobs[3], p_indexBlobsLen[3]))
return ErrorCode::Fail;
return ErrorCode::Success;
}
template<typename T>
ErrorCode
Index<T>::SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout)
{
m_sDataPointsFilename = "vectors.bin";
m_sKDTFilename = "tree.bin";
m_sGraphFilename = "graph.bin";
#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \
p_configout << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl;
#include "inc/Core/KDT/ParameterDefinitionList.h"
#undef DefineKDTParameter
p_configout << std::endl;
if (m_deletedID.size() > 0) {
RefineIndex(p_folderPath);
}
else {
if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail;
if (!m_pTrees.SaveTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail;
if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail;
}
return ErrorCode::Success;
}
template <typename T>
ErrorCode
Index<T>::SetParameter(const char* p_param, const char* p_value)

View File

@ -5,68 +5,43 @@
#include <fstream>
#include <iostream>
#include <cstring>
using namespace SPTAG;
ErrorCode
MetadataSet::RefineMetadata(std::vector<int>& indices, const std::string& p_folderPath)
MetadataSet::RefineMetadata(std::vector<SizeType>& indices, std::ostream& p_metaOut, std::ostream& p_metaIndexOut)
{
std::ofstream metaOut(p_folderPath + "metadata.bin_tmp", std::ios::binary);
std::ofstream metaIndexOut(p_folderPath + "metadataIndex.bin", std::ios::binary);
if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile;
int R = (int)indices.size();
metaIndexOut.write((char*)&R, sizeof(int));
SizeType R = (SizeType)indices.size();
p_metaIndexOut.write((char*)&R, sizeof(SizeType));
std::uint64_t offset = 0;
for (int i = 0; i < R; i++) {
metaIndexOut.write((char*)&offset, sizeof(std::uint64_t));
for (SizeType i = 0; i < R; i++) {
p_metaIndexOut.write((char*)&offset, sizeof(std::uint64_t));
ByteArray meta = GetMetadata(indices[i]);
metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length());
p_metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length());
offset += meta.Length();
}
p_metaIndexOut.write((char*)&offset, sizeof(std::uint64_t));
return ErrorCode::Success;
}
ErrorCode
MetadataSet::RefineMetadata(std::vector<SizeType>& indices, const std::string& p_metaFile, const std::string& p_metaindexFile)
{
std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary);
std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary);
if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile;
RefineMetadata(indices, metaOut, metaIndexOut);
metaOut.close();
metaIndexOut.write((char*)&offset, sizeof(std::uint64_t));
metaIndexOut.close();
SPTAG::MetadataSet::MetaCopy(p_folderPath + "metadata.bin_tmp", p_folderPath + "metadata.bin");
if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str());
std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str());
return ErrorCode::Success;
}
ErrorCode
MetadataSet::MetaCopy(const std::string& p_src, const std::string& p_dst)
{
if (p_src == p_dst) return ErrorCode::Success;
std::ifstream src(p_src, std::ios::binary);
if (!src.is_open())
{
std::cerr << "ERROR: Can't open " << p_src << std::endl;
return ErrorCode::FailedOpenFile;
}
std::ofstream dst(p_dst, std::ios::binary);
if (!dst.is_open())
{
std::cerr << "ERROR: Can't create " << p_dst << std::endl;
src.close();
return ErrorCode::FailedCreateFile;
}
int bufsize = 1000000;
char* buf = new char[bufsize];
while (!src.eof()) {
src.read(buf, bufsize);
dst.write(buf, src.gcount());
}
delete[] buf;
src.close();
dst.close();
return ErrorCode::Success;
}
MetadataSet::MetadataSet()
{
}
@ -107,19 +82,19 @@ FileMetadataSet::~FileMetadataSet()
ByteArray
FileMetadataSet::GetMetadata(IndexType p_vectorID) const
FileMetadataSet::GetMetadata(SizeType p_vectorID) const
{
std::uint64_t startoff = m_pOffsets[p_vectorID];
std::uint64_t bytes = m_pOffsets[p_vectorID + 1] - startoff;
if (p_vectorID < (IndexType)m_count) {
if (p_vectorID < m_count) {
m_fp->seekg(startoff, std::ios_base::beg);
ByteArray b = ByteArray::Alloc((SizeType)bytes);
ByteArray b = ByteArray::Alloc(bytes);
m_fp->read((char*)b.Data(), bytes);
return b;
}
else {
startoff -= m_pOffsets[m_count];
return ByteArray((std::uint8_t*)m_newdata.data() + startoff, static_cast<SizeType>(bytes), false);
return ByteArray((std::uint8_t*)m_newdata.data() + startoff, bytes, false);
}
}
@ -138,10 +113,18 @@ FileMetadataSet::Available() const
}
std::pair<std::uint64_t, std::uint64_t>
FileMetadataSet::BufferSize() const
{
return std::make_pair(m_pOffsets[m_pOffsets.size() - 1],
sizeof(SizeType) + sizeof(std::uint64_t) * m_pOffsets.size());
}
void
FileMetadataSet::AddBatch(MetadataSet& data)
{
for (int i = 0; i < static_cast<int>(data.Count()); i++)
for (SizeType i = 0; i < data.Count(); i++)
{
ByteArray newdata = data.GetMetadata(i);
m_newdata.insert(m_newdata.end(), newdata.Data(), newdata.Data() + newdata.Length());
@ -150,45 +133,52 @@ FileMetadataSet::AddBatch(MetadataSet& data)
}
ErrorCode
FileMetadataSet::SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut)
{
m_fp->seekg(0, std::ios_base::beg);
int bufsize = 1000000;
char* buf = new char[bufsize];
while (!m_fp->eof()) {
m_fp->read(buf, bufsize);
p_metaOut.write(buf, m_fp->gcount());
}
delete[] buf;
if (m_newdata.size() > 0) {
p_metaOut.write((char*)m_newdata.data(), m_newdata.size());
}
SizeType count = Count();
p_metaIndexOut.write((char*)&count, sizeof(SizeType));
p_metaIndexOut.write((char*)m_pOffsets.data(), sizeof(std::uint64_t) * m_pOffsets.size());
return ErrorCode::Success;
}
ErrorCode
FileMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile)
{
ErrorCode ret = ErrorCode::Success;
std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary);
std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary);
if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile;
SaveMetadata(metaOut, metaIndexOut);
metaOut.close();
metaIndexOut.close();
m_fp->close();
ret = MetaCopy(m_metaFile, p_metaFile);
if (ErrorCode::Success != ret)
{
return ret;
}
if (m_newdata.size() > 0) {
std::ofstream tmpout(p_metaFile, std::ofstream::app|std::ios::binary);
if (!tmpout.is_open()) return ErrorCode::FailedOpenFile;
tmpout.write((char*)m_newdata.data(), m_newdata.size());
tmpout.close();
}
if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str());
std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str());
m_fp->open(p_metaFile, std::ifstream::binary);
std::ofstream dst(p_metaindexFile, std::ios::binary);
m_count = static_cast<int>(m_pOffsets.size()) - 1;
m_count = Count();
m_newdata.clear();
dst.write((char*)&m_count, sizeof(m_count));
dst.write((char*)m_pOffsets.data(), sizeof(std::uint64_t) * m_pOffsets.size());
return ret;
return ErrorCode::Success;
}
ErrorCode
FileMetadataSet::SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) {
// TODO(lxj): serialize file to mem?
return ErrorCode::Fail;
}
ErrorCode
FileMetadataSet::LoadMetadataFromMemory(void *pGraphMemFile) {
// TODO(lxj): not support yet
return ErrorCode::Fail;
}
MemMetadataSet::MemMetadataSet(ByteArray p_metadata, ByteArray p_offsets, SizeType p_count)
: m_metadataHolder(std::move(p_metadata)),
m_offsetHolder(std::move(p_offsets)),
@ -205,17 +195,17 @@ MemMetadataSet::~MemMetadataSet()
ByteArray
MemMetadataSet::GetMetadata(IndexType p_vectorID) const
MemMetadataSet::GetMetadata(SizeType p_vectorID) const
{
if (static_cast<SizeType>(p_vectorID) < m_count)
if (p_vectorID < m_count)
{
return ByteArray(m_metadataHolder.Data() + m_offsets[p_vectorID],
static_cast<SizeType>(m_offsets[p_vectorID + 1] - m_offsets[p_vectorID]),
m_metadataHolder.DataHolder());
m_offsets[p_vectorID + 1] - m_offsets[p_vectorID],
false);
}
else if (p_vectorID < m_offsets.size() - 1) {
else if (p_vectorID < (SizeType)(m_offsets.size() - 1)) {
return ByteArray((std::uint8_t*)m_newdata.data() + m_offsets[p_vectorID] - m_offsets[m_count],
static_cast<SizeType>(m_offsets[p_vectorID + 1] - m_offsets[p_vectorID]),
m_offsets[p_vectorID + 1] - m_offsets[p_vectorID],
false);
}
@ -226,7 +216,7 @@ MemMetadataSet::GetMetadata(IndexType p_vectorID) const
SizeType
MemMetadataSet::Count() const
{
return m_count;
return static_cast<SizeType>(m_offsets.size() - 1);
}
@ -236,10 +226,18 @@ MemMetadataSet::Available() const
return m_metadataHolder.Length() > 0 && m_offsetHolder.Length() > 0;
}
std::pair<std::uint64_t, std::uint64_t>
MemMetadataSet::BufferSize() const
{
return std::make_pair(m_offsets[m_offsets.size() - 1],
sizeof(SizeType) + sizeof(std::uint64_t) * m_offsets.size());
}
void
MemMetadataSet::AddBatch(MetadataSet& data)
{
for (int i = 0; i < static_cast<int>(data.Count()); i++)
for (SizeType i = 0; i < data.Count(); i++)
{
ByteArray newdata = data.GetMetadata(i);
m_newdata.insert(m_newdata.end(), newdata.Data(), newdata.Data() + newdata.Length());
@ -247,83 +245,36 @@ MemMetadataSet::AddBatch(MetadataSet& data)
}
}
ErrorCode
MemMetadataSet::SaveMetadata(std::ostream& p_metaOut, std::ostream& p_metaIndexOut)
{
p_metaOut.write(reinterpret_cast<const char*>(m_metadataHolder.Data()), m_metadataHolder.Length());
if (m_newdata.size() > 0) {
p_metaOut.write((char*)m_newdata.data(), m_newdata.size());
}
SizeType count = Count();
p_metaIndexOut.write((char*)&count, sizeof(SizeType));
p_metaIndexOut.write((char*)m_offsets.data(), sizeof(std::uint64_t) * m_offsets.size());
return ErrorCode::Success;
}
ErrorCode
MemMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile)
{
std::ofstream outputStream;
outputStream.open(p_metaFile, std::ios::binary);
if (!outputStream.is_open())
{
std::cerr << "Error: Failed to create file " << p_metaFile << "." << std::endl;
return ErrorCode::FailedCreateFile;
}
std::ofstream metaOut(p_metaFile + "_tmp", std::ios::binary);
std::ofstream metaIndexOut(p_metaindexFile, std::ios::binary);
if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile;
outputStream.write(reinterpret_cast<const char*>(m_metadataHolder.Data()), m_metadataHolder.Length());
outputStream.write((const char*)m_newdata.data(), sizeof(std::uint8_t)*m_newdata.size());
outputStream.close();
outputStream.open(p_metaindexFile, std::ios::binary);
if (!outputStream.is_open())
{
std::cerr << "Error: Failed to create file " << p_metaindexFile << "." << std::endl;
return ErrorCode::FailedCreateFile;
}
m_count = static_cast<int>(m_offsets.size()) - 1;
outputStream.write(reinterpret_cast<const char*>(&m_count), sizeof(m_count));
outputStream.write(reinterpret_cast<const char*>(m_offsets.data()), sizeof(std::uint64_t)*m_offsets.size());
outputStream.close();
SaveMetadata(metaOut, metaIndexOut);
metaOut.close();
metaIndexOut.close();
if (fileexists(p_metaFile.c_str())) std::remove(p_metaFile.c_str());
std::rename((p_metaFile + "_tmp").c_str(), p_metaFile.c_str());
return ErrorCode::Success;
}
ErrorCode
MemMetadataSet::SaveMetadataToMemory(void **pGraphMemFile, int64_t &len) {
auto size = sizeof(int64_t) + sizeof(int64_t) + m_metadataHolder.Length() + sizeof(std::uint64_t) * m_offsets.size();
char* mem = (char*)malloc(size);
if (mem == NULL) return ErrorCode::Fail;
auto ptr = mem;
*(int64_t*)ptr = m_metadataHolder.Length();
ptr += sizeof(int64_t);
m_count = static_cast<int>(m_offsets.size()) - 1;
*(int64_t*)ptr = m_count;
ptr += sizeof(int64_t);
memcpy(ptr, m_metadataHolder.Data(), m_metadataHolder.Length());
ptr += m_metadataHolder.Length();
memcpy(ptr, m_offsets.data(), sizeof(std::uint64_t)*m_offsets.size());
*pGraphMemFile = mem;
len = size;
return ErrorCode::Success;
}
ErrorCode
MemMetadataSet::LoadMetadataFromMemory(void *pGraphMemFile) {
m_metadataHolder.Clear();
m_offsetHolder.Clear();
m_offsets.clear();
char* ptr = (char *)pGraphMemFile;
auto metadataHolderLength = *(int64_t *)ptr;
ptr += sizeof(int64_t);
m_count = *(int64_t *)ptr;
ptr += sizeof(int64_t);
m_metadataHolder = ByteArray::Alloc(metadataHolderLength);
memcpy(m_metadataHolder.Data(), ptr, metadataHolderLength);
ptr += metadataHolderLength;
m_offsetHolder = ByteArray::Alloc(sizeof(std::uint64_t ) * (m_count + 1));
memcpy(m_offsetHolder.Data(), ptr, sizeof(std::uint64_t ) * (m_count + 1));
const std::uint64_t* newdata = reinterpret_cast<const std::uint64_t*>(m_offsetHolder.Data());
m_offsets.insert(m_offsets.end(), newdata, newdata + m_count + 1);
return ErrorCode::Success;
}

View File

@ -6,6 +6,7 @@
#include "inc/Helper/CommonHelper.h"
#include "inc/Helper/StringConvert.h"
#include "inc/Helper/SimpleIniReader.h"
#include "inc/Helper/BufferStream.h"
#include "inc/Core/BKT/Index.h"
#include "inc/Core/KDT/Index.h"
@ -46,7 +47,7 @@ VectorIndex::SetMetadata(const std::string& p_metadataFilePath, const std::strin
ByteArray
VectorIndex::GetMetadata(IndexType p_vectorID) const {
VectorIndex::GetMetadata(SizeType p_vectorID) const {
if (nullptr != m_pMetadata)
{
return m_pMetadata->GetMetadata(p_vectorID);
@ -55,6 +56,100 @@ VectorIndex::GetMetadata(IndexType p_vectorID) const {
}
std::shared_ptr<std::vector<std::uint64_t>> VectorIndex::CalculateBufferSize() const
{
std::shared_ptr<std::vector<std::uint64_t>> ret = BufferSize();
if (m_pMetadata != nullptr)
{
auto metasize = m_pMetadata->BufferSize();
ret->push_back(metasize.first);
ret->push_back(metasize.second);
}
return std::move(ret);
}
ErrorCode
VectorIndex::LoadIndexConfig(Helper::IniReader& p_reader)
{
std::string metadataSection("MetaData");
if (p_reader.DoesSectionExist(metadataSection))
{
m_sMetadataFile = p_reader.GetParameter(metadataSection, "MetaDataFilePath", std::string());
m_sMetadataIndexFile = p_reader.GetParameter(metadataSection, "MetaDataIndexPath", std::string());
}
if (DistCalcMethod::Undefined == p_reader.GetParameter("Index", "DistCalcMethod", DistCalcMethod::Undefined))
{
std::cerr << "Error: Failed to load parameter DistCalcMethod." << std::endl;
return ErrorCode::Fail;
}
return LoadConfig(p_reader);
}
ErrorCode
VectorIndex::SaveIndexConfig(std::ostream& p_configOut)
{
if (nullptr != m_pMetadata)
{
p_configOut << "[MetaData]" << std::endl;
p_configOut << "MetaDataFilePath=" << m_sMetadataFile << std::endl;
p_configOut << "MetaDataIndexPath=" << m_sMetadataIndexFile << std::endl;
if (nullptr != m_pMetaToVec) p_configOut << "MetaDataToVectorIndex=true" << std::endl;
p_configOut << std::endl;
}
p_configOut << "[Index]" << std::endl;
p_configOut << "IndexAlgoType=" << Helper::Convert::ConvertToString(GetIndexAlgoType()) << std::endl;
p_configOut << "ValueType=" << Helper::Convert::ConvertToString(GetVectorValueType()) << std::endl;
p_configOut << std::endl;
return SaveConfig(p_configOut);
}
void
VectorIndex::BuildMetaMapping()
{
m_pMetaToVec.reset(new std::unordered_map<std::string, SizeType>);
for (SizeType i = 0; i < m_pMetadata->Count(); i++) {
ByteArray meta = m_pMetadata->GetMetadata(i);
m_pMetaToVec->emplace(std::string((char*)meta.Data(), meta.Length()), i);
}
}
ErrorCode
VectorIndex::LoadIndex(const std::string& p_config, const std::vector<ByteArray>& p_indexBlobs)
{
SPTAG::Helper::IniReader p_reader;
std::istringstream p_configin(p_config);
if (SPTAG::ErrorCode::Success != p_reader.LoadIni(p_configin)) return ErrorCode::FailedParseValue;
LoadIndexConfig(p_reader);
if (p_reader.DoesSectionExist("MetaData") && p_indexBlobs.size() > 4)
{
ByteArray pMetaIndex = p_indexBlobs[p_indexBlobs.size() - 1];
m_pMetadata.reset(new MemMetadataSet(p_indexBlobs[p_indexBlobs.size() - 2],
ByteArray(pMetaIndex.Data() + sizeof(SizeType), pMetaIndex.Length() - sizeof(SizeType), false),
*((SizeType*)pMetaIndex.Data())));
if (!m_pMetadata->Available())
{
std::cerr << "Error: Failed to load metadata." << std::endl;
return ErrorCode::Fail;
}
if (p_reader.GetParameter("MetaData", "MetaDataToVectorIndex", std::string()) == "true")
{
BuildMetaMapping();
}
}
return LoadIndexDataFromMemory(p_indexBlobs);
}
ErrorCode
VectorIndex::LoadIndex(const std::string& p_folderPath)
{
@ -65,40 +160,64 @@ VectorIndex::LoadIndex(const std::string& p_folderPath)
}
Helper::IniReader p_configReader;
if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini"))
if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini")) return ErrorCode::FailedOpenFile;
LoadIndexConfig(p_configReader);
if (p_configReader.DoesSectionExist("MetaData"))
{
return ErrorCode::FailedOpenFile;
}
std::string metadataSection("MetaData");
if (p_configReader.DoesSectionExist(metadataSection))
{
std::string metadataFilePath = p_configReader.GetParameter(metadataSection,
"MetaDataFilePath",
std::string());
std::string metadataIndexFilePath = p_configReader.GetParameter(metadataSection,
"MetaDataIndexPath",
std::string());
m_pMetadata.reset(new FileMetadataSet(folderPath + metadataFilePath, folderPath + metadataIndexFilePath));
m_pMetadata.reset(new FileMetadataSet(folderPath + m_sMetadataFile, folderPath + m_sMetadataIndexFile));
if (!m_pMetadata->Available())
{
std::cerr << "Error: Failed to load metadata." << std::endl;
return ErrorCode::Fail;
}
}
if (DistCalcMethod::Undefined == p_configReader.GetParameter("Index", "DistCalcMethod", DistCalcMethod::Undefined))
{
std::cerr << "Error: Failed to load parameter DistCalcMethod." << std::endl;
return ErrorCode::Fail;
}
return LoadIndex(folderPath, p_configReader);
if (p_configReader.GetParameter("MetaData", "MetaDataToVectorIndex", std::string()) == "true")
{
BuildMetaMapping();
}
}
return LoadIndexData(folderPath);
}
ErrorCode VectorIndex::SaveIndex(const std::string& p_folderPath)
ErrorCode
VectorIndex::SaveIndex(std::string& p_config, const std::vector<ByteArray>& p_indexBlobs)
{
std::ostringstream p_configStream;
SaveIndexConfig(p_configStream);
p_config = p_configStream.str();
std::vector<std::ostream*> p_indexStreams;
for (size_t i = 0; i < p_indexBlobs.size(); i++)
{
p_indexStreams.push_back(new Helper::obufferstream(new Helper::streambuf((char*)p_indexBlobs[i].Data(), p_indexBlobs[i].Length()), true));
}
ErrorCode ret = ErrorCode::Success;
if (NeedRefine())
{
ret = RefineIndex(p_indexStreams);
}
else
{
if (m_pMetadata != nullptr && p_indexStreams.size() > 5)
{
ret = m_pMetadata->SaveMetadata(*p_indexStreams[p_indexStreams.size() - 2], *p_indexStreams[p_indexStreams.size() - 1]);
}
if (ErrorCode::Success == ret) ret = SaveIndexData(p_indexStreams);
}
for (size_t i = 0; i < p_indexStreams.size(); i++)
{
delete p_indexStreams[i];
}
return ret;
}
ErrorCode
VectorIndex::SaveIndex(const std::string& p_folderPath)
{
std::string folderPath(p_folderPath);
if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep)
@ -111,39 +230,24 @@ ErrorCode VectorIndex::SaveIndex(const std::string& p_folderPath)
mkdir(folderPath.c_str());
}
std::string loaderFilePath = folderPath + "indexloader.ini";
std::ofstream configFile(folderPath + "indexloader.ini");
if (!configFile.is_open()) return ErrorCode::FailedCreateFile;
SaveIndexConfig(configFile);
configFile.close();
if (NeedRefine()) return RefineIndex(p_folderPath);
std::ofstream loaderFile(loaderFilePath);
if (!loaderFile.is_open())
if (m_pMetadata != nullptr)
{
return ErrorCode::FailedCreateFile;
ErrorCode ret = m_pMetadata->SaveMetadata(folderPath + m_sMetadataFile, folderPath + m_sMetadataIndexFile);
if (ErrorCode::Success != ret) return ret;
}
if (nullptr != m_pMetadata)
{
std::string metadataFile = "metadata.bin";
std::string metadataIndexFile = "metadataIndex.bin";
loaderFile << "[MetaData]" << std::endl;
loaderFile << "MetaDataFilePath=" << metadataFile << std::endl;
loaderFile << "MetaDataIndexPath=" << metadataIndexFile << std::endl;
loaderFile << std::endl;
m_pMetadata->SaveMetadata(folderPath + metadataFile, folderPath + metadataIndexFile);
}
loaderFile << "[Index]" << std::endl;
loaderFile << "IndexAlgoType=" << Helper::Convert::ConvertToString(GetIndexAlgoType()) << std::endl;
loaderFile << "ValueType=" << Helper::Convert::ConvertToString(GetVectorValueType()) << std::endl;
loaderFile << std::endl;
ErrorCode ret = SaveIndex(folderPath, loaderFile);
loaderFile.close();
return ret;
return SaveIndexData(folderPath);
}
ErrorCode
VectorIndex::BuildIndex(std::shared_ptr<VectorSet> p_vectorSet,
std::shared_ptr<MetadataSet> p_metadataSet)
std::shared_ptr<MetadataSet> p_metadataSet, bool p_withMetaIndex)
{
if (nullptr == p_vectorSet || p_vectorSet->Count() == 0 || p_vectorSet->Dimension() == 0 || p_vectorSet->GetValueType() != GetVectorValueType())
{
@ -152,13 +256,17 @@ VectorIndex::BuildIndex(std::shared_ptr<VectorSet> p_vectorSet,
BuildIndex(p_vectorSet->GetData(), p_vectorSet->Count(), p_vectorSet->Dimension());
m_pMetadata = std::move(p_metadataSet);
if (p_withMetaIndex && m_pMetadata != nullptr)
{
BuildMetaMapping();
}
return ErrorCode::Success;
}
ErrorCode
VectorIndex::SearchIndex(const void* p_vector, int p_neighborCount, std::vector<BasicResult>& p_results) const {
QueryResult res(p_vector, p_neighborCount, p_results);
VectorIndex::SearchIndex(const void* p_vector, int p_neighborCount, bool p_withMeta, BasicResult* p_results) const {
QueryResult res(p_vector, p_neighborCount, p_withMeta, p_results);
SearchIndex(res);
return ErrorCode::Success;
}
@ -170,17 +278,54 @@ VectorIndex::AddIndex(std::shared_ptr<VectorSet> p_vectorSet, std::shared_ptr<Me
{
return ErrorCode::Fail;
}
AddIndex(p_vectorSet->GetData(), p_vectorSet->Count(), p_vectorSet->Dimension());
SizeType currStart;
ErrorCode ret = AddIndex(p_vectorSet->GetData(), p_vectorSet->Count(), p_vectorSet->Dimension(), &currStart);
if (ret != ErrorCode::Success) return ret;
if (m_pMetadata == nullptr) {
m_pMetadata = std::move(p_metadataSet);
if (currStart == 0)
m_pMetadata = std::move(p_metadataSet);
else
return ErrorCode::Success;
}
else {
m_pMetadata->AddBatch(*p_metadataSet);
}
if (m_pMetaToVec != nullptr) {
for (SizeType i = 0; i < p_vectorSet->Count(); i++) {
ByteArray meta = m_pMetadata->GetMetadata(currStart + i);
DeleteIndex(meta);
m_pMetaToVec->emplace(std::string((char*)meta.Data(), meta.Length()), currStart + i);
}
}
return ErrorCode::Success;
}
ErrorCode
VectorIndex::DeleteIndex(ByteArray p_meta) {
if (m_pMetaToVec == nullptr) return ErrorCode::Fail;
std::string meta((char*)p_meta.Data(), p_meta.Length());
auto iter = m_pMetaToVec->find(meta);
if (iter != m_pMetaToVec->end()) DeleteIndex(iter->second);
return ErrorCode::Success;
}
const void* VectorIndex::GetSample(ByteArray p_meta)
{
if (m_pMetaToVec == nullptr) return nullptr;
std::string meta((char*)p_meta.Data(), p_meta.Length());
auto iter = m_pMetaToVec->find(meta);
if (iter != m_pMetaToVec->end()) return GetSample(iter->second);
return nullptr;
}
std::shared_ptr<VectorIndex>
VectorIndex::CreateInstance(IndexAlgoType p_algo, VectorValueType p_valuetype)
{
@ -223,100 +368,61 @@ ErrorCode
VectorIndex::LoadIndex(const std::string& p_loaderFilePath, std::shared_ptr<VectorIndex>& p_vectorIndex)
{
Helper::IniReader iniReader;
if (ErrorCode::Success != iniReader.LoadIniFile(p_loaderFilePath + "/indexloader.ini"))
{
return ErrorCode::FailedOpenFile;
}
if (ErrorCode::Success != iniReader.LoadIniFile(p_loaderFilePath + "/indexloader.ini")) return ErrorCode::FailedOpenFile;
IndexAlgoType algoType = iniReader.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined);
VectorValueType valueType = iniReader.GetParameter("Index", "ValueType", VectorValueType::Undefined);
if (IndexAlgoType::Undefined == algoType || VectorValueType::Undefined == valueType)
{
return ErrorCode::Fail;
}
if (algoType == IndexAlgoType::BKT) {
switch (valueType)
{
#define DefineVectorValueType(Name, Type) \
case VectorValueType::Name: \
p_vectorIndex.reset(new BKT::Index<Type>); \
p_vectorIndex->LoadIndex(p_loaderFilePath); \
break; \
p_vectorIndex = CreateInstance(algoType, valueType);
if (p_vectorIndex == nullptr) return ErrorCode::FailedParseValue;
#include "inc/Core/DefinitionList.h"
#undef DefineVectorValueType
default: break;
}
}
else if (algoType == IndexAlgoType::KDT) {
switch (valueType)
{
#define DefineVectorValueType(Name, Type) \
case VectorValueType::Name: \
p_vectorIndex.reset(new KDT::Index<Type>); \
p_vectorIndex->LoadIndex(p_loaderFilePath); \
break; \
#include "inc/Core/DefinitionList.h"
#undef DefineVectorValueType
default: break;
}
}
return ErrorCode::Success;
return p_vectorIndex->LoadIndex(p_loaderFilePath);
}
ErrorCode VectorIndex::MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2)
ErrorCode
VectorIndex::LoadIndex(const std::string& p_config, const std::vector<ByteArray>& p_indexBlobs, std::shared_ptr<VectorIndex>& p_vectorIndex)
{
SPTAG::Helper::IniReader iniReader;
std::istringstream p_configin(p_config);
if (SPTAG::ErrorCode::Success != iniReader.LoadIni(p_configin)) return ErrorCode::FailedParseValue;
IndexAlgoType algoType = iniReader.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined);
VectorValueType valueType = iniReader.GetParameter("Index", "ValueType", VectorValueType::Undefined);
p_vectorIndex = CreateInstance(algoType, valueType);
if (p_vectorIndex == nullptr) return ErrorCode::FailedParseValue;
return p_vectorIndex->LoadIndex(p_config, p_indexBlobs);
}
ErrorCode
VectorIndex::MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2)
{
std::string folderPath1(p_indexFilePath1), folderPath2(p_indexFilePath2);
if (!folderPath1.empty() && *(folderPath1.rbegin()) != FolderSep) folderPath1 += FolderSep;
if (!folderPath2.empty() && *(folderPath2.rbegin()) != FolderSep) folderPath2 += FolderSep;
Helper::IniReader p_configReader1, p_configReader2;
if (ErrorCode::Success != p_configReader1.LoadIniFile(folderPath1 + "/indexloader.ini"))
return ErrorCode::FailedOpenFile;
std::shared_ptr<VectorIndex> index1, index2;
LoadIndex(folderPath1, index1);
LoadIndex(folderPath2, index2);
if (ErrorCode::Success != p_configReader2.LoadIniFile(folderPath2 + "/indexloader.ini"))
return ErrorCode::FailedOpenFile;
std::shared_ptr<VectorSet> p_vectorSet;
std::shared_ptr<MetadataSet> p_metaSet;
size_t vectorSize = GetValueTypeSize(index2->GetVectorValueType()) * index2->GetFeatureDim();
std::uint64_t offsets[2] = { 0 };
ByteArray metaoffset((std::uint8_t*)offsets, 2 * sizeof(std::uint64_t), false);
for (SizeType i = 0; i < index2->GetNumSamples(); i++)
if (index2->ContainSample(i))
{
p_vectorSet.reset(new BasicVectorSet(ByteArray((std::uint8_t*)index2->GetSample(i), vectorSize, false),
index2->GetVectorValueType(), index2->GetFeatureDim(), 1));
ByteArray meta = index2->GetMetadata(i);
offsets[1] = meta.Length();
p_metaSet.reset(new MemMetadataSet(meta, metaoffset, 1));
index1->AddIndex(p_vectorSet, p_metaSet);
}
std::shared_ptr<VectorIndex> index = CreateInstance(
p_configReader1.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined),
p_configReader1.GetParameter("Index", "ValueType", VectorValueType::Undefined));
if (index == nullptr) return ErrorCode::FailedParseValue;
std::string empty("");
if (!COMMON::DataUtils::MergeIndex(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty),
folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty),
folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty),
folderPath2 + p_configReader1.GetParameter("Index", "VectorFilePath", empty),
folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty),
folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty)))
return ErrorCode::Fail;
for (const auto& iter : p_configReader1.GetParameters("Index"))
index->SetParameter(iter.first.c_str(), iter.second.c_str());
if (p_configReader1.DoesSectionExist("MetaData"))
{
for (const auto& iter : p_configReader1.GetParameters("MetaData"))
index->SetParameter(iter.first.c_str(), iter.second.c_str());
index->SetMetadata(folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty),
folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty));
}
std::ifstream vecIn(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), std::ios::binary);
int R, C;
vecIn.read((char*)&R, sizeof(int));
vecIn.read((char*)&C, sizeof(int));
size_t size = R * C * GetValueTypeSize(index->GetVectorValueType());
char* data = new char[size];
vecIn.read(data, size);
vecIn.close();
index->BuildIndex((void*)data, R, C);
index->SaveIndex(folderPath1);
index1->SaveIndex(folderPath1);
return ErrorCode::Success;
}
}

View File

@ -19,7 +19,7 @@ VectorSet::~VectorSet()
BasicVectorSet::BasicVectorSet(const ByteArray& p_bytesArray,
VectorValueType p_valueType,
SizeType p_dimension,
DimensionType p_dimension,
SizeType p_vectorCount)
: m_data(p_bytesArray),
m_valueType(p_valueType),
@ -43,15 +43,14 @@ BasicVectorSet::GetValueType() const
void*
BasicVectorSet::GetVector(IndexType p_vectorID) const
BasicVectorSet::GetVector(SizeType p_vectorID) const
{
if (p_vectorID < 0 || static_cast<SizeType>(p_vectorID) >= m_vectorCount)
if (p_vectorID < 0 || p_vectorID >= m_vectorCount)
{
return nullptr;
}
SizeType offset = static_cast<SizeType>(p_vectorID) * m_perVectorDataSize;
return reinterpret_cast<void*>(m_data.Data() + offset);
return reinterpret_cast<void*>(m_data.Data() + ((size_t)p_vectorID) * m_perVectorDataSize);
}
@ -61,7 +60,7 @@ BasicVectorSet::GetData() const
return reinterpret_cast<void*>(m_data.Data());
}
SizeType
DimensionType
BasicVectorSet::Dimension() const
{
return m_dimension;
@ -88,8 +87,8 @@ BasicVectorSet::Save(const std::string& p_vectorFile) const
FILE * fp = fopen(p_vectorFile.c_str(), "wb");
if (fp == NULL) return ErrorCode::FailedOpenFile;
fwrite(&m_vectorCount, sizeof(int), 1, fp);
fwrite(&m_dimension, sizeof(int), 1, fp);
fwrite(&m_vectorCount, sizeof(SizeType), 1, fp);
fwrite(&m_dimension, sizeof(DimensionType), 1, fp);
fwrite((const void*)(m_data.Data()), m_data.Length(), 1, fp);
fclose(fp);

View File

@ -25,15 +25,8 @@ IniReader::~IniReader()
}
ErrorCode
IniReader::LoadIniFile(const std::string& p_iniFilePath)
ErrorCode IniReader::LoadIni(std::istream& p_input)
{
std::ifstream input(p_iniFilePath);
if (!input.is_open())
{
return ErrorCode::FailedOpenFile;
}
const std::size_t c_bufferSize = 1 << 16;
std::unique_ptr<char[]> line(new char[c_bufferSize]);
@ -51,9 +44,9 @@ IniReader::LoadIniFile(const std::string& p_iniFilePath)
return std::isspace(p_ch) != 0;
};
while (!input.eof())
while (!p_input.eof())
{
if (!input.getline(line.get(), c_bufferSize))
if (!p_input.getline(line.get(), c_bufferSize))
{
break;
}
@ -141,11 +134,21 @@ IniReader::LoadIniFile(const std::string& p_iniFilePath)
}
}
}
return ErrorCode::Success;
}
ErrorCode
IniReader::LoadIniFile(const std::string& p_iniFilePath)
{
std::ifstream input(p_iniFilePath);
if (!input.is_open()) return ErrorCode::FailedOpenFile;
ErrorCode ret = LoadIni(input);
input.close();
return ret;
}
bool
IniReader::DoesSectionExist(const std::string& p_section) const
{

View File

@ -0,0 +1,44 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "inc/Helper/VectorSetReader.h"
#include "inc/Helper/VectorSetReaders/DefaultReader.h"
using namespace SPTAG;
using namespace SPTAG::Helper;
ReaderOptions::ReaderOptions(VectorValueType p_valueType, DimensionType p_dimension, std::string p_vectorDelimiter, std::uint32_t p_threadNum)
: m_threadNum(p_threadNum), m_dimension(p_dimension), m_vectorDelimiter(p_vectorDelimiter), m_inputValueType(p_valueType)
{
AddOptionalOption(m_threadNum, "-t", "--thread", "Thread Number.");
AddOptionalOption(m_vectorDelimiter, "", "--delimiter", "Vector delimiter.");
AddRequiredOption(m_dimension, "-d", "--dimension", "Dimension of vector.");
AddRequiredOption(m_inputValueType, "-v", "--vectortype", "Input vector data type. Default is float.");
}
ReaderOptions::~ReaderOptions()
{
}
VectorSetReader::VectorSetReader(std::shared_ptr<ReaderOptions> p_options)
: m_options(p_options)
{
}
VectorSetReader:: ~VectorSetReader()
{
}
std::shared_ptr<VectorSetReader>
VectorSetReader::CreateInstance(std::shared_ptr<ReaderOptions> p_options)
{
return std::shared_ptr<VectorSetReader>(new DefaultReader(std::move(p_options)));
}

View File

@ -1,17 +1,17 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "inc/IndexBuilder/VectorSetReaders/DefaultReader.h"
#include "inc/Helper/VectorSetReaders/DefaultReader.h"
#include "inc/Helper/StringConvert.h"
#include "inc/Helper/CommonHelper.h"
#include "inc/IndexBuilder/ThreadPool.h"
#include <fstream>
#include <sstream>
#include <iostream>
#include <omp.h>
using namespace SPTAG;
using namespace SPTAG::IndexBuilder;
using namespace SPTAG::Helper;
namespace
{
@ -139,10 +139,13 @@ private:
} // namespace Local
} // namespace
DefaultReader::DefaultReader(std::shared_ptr<BuilderOptions> p_options)
DefaultReader::DefaultReader(std::shared_ptr<ReaderOptions> p_options)
: VectorSetReader(std::move(p_options)),
m_subTaskBlocksize(0)
m_subTaskBlocksize(0)
{
omp_set_num_threads(m_options->m_threadNum);
std::string tempFolder("tempfolder");
if (!direxists(tempFolder.c_str()))
{
@ -180,7 +183,7 @@ DefaultReader::LoadFile(const std::string& p_filePaths)
{
const auto& files = GetFileSizes(p_filePaths);
std::vector<std::function<void()>> subWorks;
subWorks.reserve(files.size() * ThreadPool::CurrentThreadNum());
subWorks.reserve(files.size() * m_options->m_threadNum);
m_subTaskCount = 0;
for (const auto& fileInfo : files)
@ -197,7 +200,7 @@ DefaultReader::LoadFile(const std::string& p_filePaths)
std::size_t blockSize = m_subTaskBlocksize;
if (0 == blockSize)
{
fileTaskCount = ThreadPool::CurrentThreadNum();
fileTaskCount = m_options->m_threadNum;
blockSize = (fileInfo.second + fileTaskCount - 1) / fileTaskCount;
}
else
@ -223,9 +226,10 @@ DefaultReader::LoadFile(const std::string& p_filePaths)
m_waitSignal.Reset(m_subTaskCount);
for (auto& workItem : subWorks)
#pragma omp parallel for schedule(dynamic)
for (int64_t i = 0; i < (int64_t)subWorks.size(); i++)
{
ThreadPool::Queue(std::move(workItem));
subWorks[i]();
}
m_waitSignal.Wait();
@ -244,7 +248,7 @@ DefaultReader::GetVectorSet() const
std::ifstream inputStream;
inputStream.open(m_vectorOutput, std::ifstream::binary);
inputStream.seekg(sizeof(uint32_t) + sizeof(uint32_t), std::ifstream::beg);
inputStream.seekg(sizeof(SizeType) + sizeof(DimensionType), std::ifstream::beg);
inputStream.read(vecBuf, m_totalRecordVectorBytes);
inputStream.close();
@ -276,7 +280,7 @@ DefaultReader::LoadFileInternal(const std::string& p_filePath,
std::ofstream metaStreamContent;
std::ofstream metaStreamIndex;
std::uint32_t recordCount = 0;
SizeType recordCount = 0;
std::uint64_t metaOffset = 0;
std::size_t totalRead = 0;
std::streamoff startpos = p_fileBlockID * p_fileBlockSize;
@ -400,12 +404,12 @@ DefaultReader::MergeData()
std::unique_ptr<char[]> bufferHolder(new char[bufferSize]);
char* buf = bufferHolder.get();
std::uint32_t uint32Var = m_totalRecordCount;
SizeType totalRecordCount = m_totalRecordCount;
outputStream.open(m_vectorOutput, std::ofstream::binary);
outputStream.write(reinterpret_cast<char*>(&uint32Var), sizeof(uint32Var));
outputStream.write(reinterpret_cast<char*>(&(m_options->m_dimension)), sizeof(m_options->m_dimension));
outputStream.write(reinterpret_cast<char*>(&totalRecordCount), sizeof(totalRecordCount));
outputStream.write(reinterpret_cast<char*>(&(m_options->m_dimension)), sizeof(m_options->m_dimension));
for (std::uint32_t i = 0; i < m_subTaskCount; ++i)
{
@ -442,7 +446,7 @@ DefaultReader::MergeData()
outputStream.open(m_metadataIndexOutput, std::ofstream::binary);
outputStream.write(reinterpret_cast<char*>(&uint32Var), sizeof(uint32Var));
outputStream.write(reinterpret_cast<char*>(&totalRecordCount), sizeof(totalRecordCount));
std::uint64_t totalOffset = 0;
for (std::uint32_t i = 0; i < m_subTaskCount; ++i)
@ -453,18 +457,18 @@ DefaultReader::MergeData()
file += ".tmp";
inputStream.open(file, std::ifstream::binary);
for (std::uint32_t remains = m_subTaskRecordCount[i]; remains > 0;)
for (SizeType remains = m_subTaskRecordCount[i]; remains > 0;)
{
std::size_t readBytesCount = min(remains * sizeof(std::uint64_t), bufferSizeTrim64);
inputStream.read(buf, readBytesCount);
std::uint64_t* offset = reinterpret_cast<std::uint64_t*>(buf);
for (std::uint32_t i = 0; i < readBytesCount / sizeof(std::uint64_t); ++i)
for (std::uint64_t i = 0; i < readBytesCount / sizeof(std::uint64_t); ++i)
{
offset[i] += totalOffset;
}
outputStream.write(buf, readBytesCount);
remains -= static_cast<std::uint32_t>(readBytesCount / sizeof(std::uint64_t));
remains -= static_cast<SizeType>(readBytesCount / sizeof(std::uint64_t));
}
inputStream.read(buf, sizeof(std::uint64_t));

View File

@ -11,14 +11,8 @@ using namespace SPTAG::IndexBuilder;
BuilderOptions::BuilderOptions()
: m_threadNum(32),
m_inputValueType(VectorValueType::Float),
m_vectorDelimiter("|")
: Helper::ReaderOptions(VectorValueType::Float, 0, "|", 32)
{
AddOptionalOption(m_threadNum, "-t", "--thread", "Thread Number.");
AddOptionalOption(m_vectorDelimiter, "", "--delimiter", "Vector delimiter.");
AddRequiredOption(m_dimension, "-d", "--dimension", "Dimension of vector.");
AddRequiredOption(m_inputValueType, "-v", "--vectortype", "Input vector data type. Default is float.");
AddRequiredOption(m_inputFiles, "-i", "--input", "Input raw data.");
AddRequiredOption(m_outputFolder, "-o", "--outputfolder", "Output folder.");
AddRequiredOption(m_indexAlgoType, "-a", "--algo", "Index Algorithm type.");

View File

@ -1,27 +0,0 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "inc/IndexBuilder/VectorSetReader.h"
#include "inc/IndexBuilder/VectorSetReaders/DefaultReader.h"
using namespace SPTAG;
using namespace SPTAG::IndexBuilder;
VectorSetReader::VectorSetReader(std::shared_ptr<BuilderOptions> p_options)
: m_options(p_options)
{
}
VectorSetReader:: ~VectorSetReader()
{
}
std::shared_ptr<VectorSetReader>
VectorSetReader::CreateInstance(std::shared_ptr<BuilderOptions> p_options)
{
return std::shared_ptr<VectorSetReader>(new DefaultReader(std::move(p_options)));
}

View File

@ -1,9 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "inc/IndexBuilder/ThreadPool.h"
#include "inc/IndexBuilder/Options.h"
#include "inc/IndexBuilder/VectorSetReader.h"
#include "inc/Helper/VectorSetReader.h"
#include "inc/Core/VectorIndex.h"
#include "inc/Core/Common.h"
#include "inc/Helper/SimpleIniReader.h"
@ -20,7 +19,7 @@ int main(int argc, char* argv[])
{
exit(1);
}
IndexBuilder::ThreadPool::Init(options->m_threadNum);
auto indexBuilder = VectorIndex::CreateInstance(options->m_indexAlgoType, options->m_inputValueType);
Helper::IniReader iniReader;
@ -32,14 +31,14 @@ int main(int argc, char* argv[])
for (int i = 1; i < argc; i++)
{
std::string param(argv[i]);
int idx = (int)param.find("=");
if (idx < 0) continue;
size_t idx = param.find("=");
if (idx == std::string::npos) continue;
std::string paramName = param.substr(0, idx);
std::string paramVal = param.substr(idx + 1);
std::string sectionName;
idx = (int)paramName.find(".");
if (idx >= 0) {
idx = paramName.find(".");
if (idx != std::string::npos) {
sectionName = paramName.substr(0, idx);
paramName = paramName.substr(idx + 1);
}
@ -63,9 +62,10 @@ int main(int argc, char* argv[])
fprintf(stderr, "Failed to read input file.\n");
exit(1);
}
int row, col;
inputStream.read((char*)&row, sizeof(int));
inputStream.read((char*)&col, sizeof(int));
SizeType row;
DimensionType col;
inputStream.read((char*)&row, sizeof(SizeType));
inputStream.read((char*)&col, sizeof(DimensionType));
std::uint64_t totalRecordVectorBytes = ((std::uint64_t)GetValueTypeSize(options->m_inputValueType)) * row * col;
ByteArray vectorSet = ByteArray::Alloc(totalRecordVectorBytes);
char* vecBuf = reinterpret_cast<char*>(vectorSet.Data());
@ -81,7 +81,7 @@ int main(int argc, char* argv[])
indexBuilder->SaveIndex(options->m_outputFolder);
}
else {
auto vectorReader = IndexBuilder::VectorSetReader::CreateInstance(options);
auto vectorReader = Helper::VectorSetReader::CreateInstance(options);
if (ErrorCode::Success != vectorReader->LoadFile(options->m_inputFiles))
{
fprintf(stderr, "Failed to read input file.\n");

View File

@ -15,13 +15,13 @@
using namespace SPTAG;
template <typename T>
float CalcRecall(std::vector<QueryResult> &results, const std::vector<std::set<int>> &truth, int NumQuerys, int K, std::ofstream& log)
float CalcRecall(std::vector<QueryResult> &results, const std::vector<std::set<SizeType>> &truth, SizeType NumQuerys, int K, std::ofstream& log)
{
float meanrecall = 0, minrecall = MaxDist, maxrecall = 0, stdrecall = 0;
std::vector<float> thisrecall(NumQuerys, 0);
for (int i = 0; i < NumQuerys; i++)
for (SizeType i = 0; i < NumQuerys; i++)
{
for (int id : truth[i])
for (SizeType id : truth[i])
{
for (int j = 0; j < K; j++)
{
@ -38,7 +38,7 @@ float CalcRecall(std::vector<QueryResult> &results, const std::vector<std::set<i
if (thisrecall[i] > maxrecall) maxrecall = thisrecall[i];
}
meanrecall /= NumQuerys;
for (int i = 0; i < NumQuerys; i++)
for (SizeType i = 0; i < NumQuerys; i++)
{
stdrecall += (thisrecall[i] - meanrecall) * (thisrecall[i] - meanrecall);
}
@ -47,11 +47,11 @@ float CalcRecall(std::vector<QueryResult> &results, const std::vector<std::set<i
return meanrecall;
}
void LoadTruth(std::ifstream& fp, std::vector<std::set<int>>& truth, int NumQuerys, int K)
void LoadTruth(std::ifstream& fp, std::vector<std::set<SizeType>>& truth, SizeType NumQuerys, int K)
{
int get;
SizeType get;
std::string line;
for (int i = 0; i < NumQuerys; ++i)
for (SizeType i = 0; i < NumQuerys; ++i)
{
truth[i].clear();
for (int j = 0; j < K; ++j)
@ -70,8 +70,8 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
std::string truthFile = reader.GetParameter("Index", "TruthFile", std::string("truth.txt"));
std::string outputFile = reader.GetParameter("Index", "ResultFile", std::string(""));
int numBatchQuerys = reader.GetParameter("Index", "NumBatchQuerys", 10000);
int numDebugQuerys = reader.GetParameter("Index", "NumDebugQuerys", -1);
SizeType numBatchQuerys = reader.GetParameter("Index", "NumBatchQuerys", (SizeType)10000);
SizeType numDebugQuerys = reader.GetParameter("Index", "NumDebugQuerys", (SizeType)-1);
int K = reader.GetParameter("Index", "K", 32);
std::vector<std::string> maxCheck = Helper::StrUtils::SplitString(reader.GetParameter("Index", "MaxCheck", std::string("2048")), "#");
@ -100,13 +100,13 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
return -1;
}
int numQuerys = (numDebugQuerys >= 0) ? numDebugQuerys : numBatchQuerys;
SizeType numQuerys = (numDebugQuerys >= 0) ? numDebugQuerys : numBatchQuerys;
std::vector<std::vector<T>> Query(numQuerys, std::vector<T>(index.GetFeatureDim(), 0));
std::vector<std::set<int>> truth(numQuerys);
std::vector<std::set<SizeType>> truth(numQuerys);
std::vector<QueryResult> results(numQuerys, QueryResult(NULL, K, 0));
int * latencies = new int[numQuerys + 1];
clock_t * latencies = new clock_t[numQuerys + 1];
int base = 1;
if (index.GetDistCalcMethod() == DistCalcMethod::Cosine) {
@ -114,7 +114,7 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
}
int basesquare = base * base;
int dims = index.GetFeatureDim();
DimensionType dims = index.GetFeatureDim();
std::vector<std::string> QStrings;
while (!inStream.eof())
{
@ -122,43 +122,33 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
COMMON::Utils::PrepareQuerys(inStream, QStrings, Query, numQuerys, dims, index.GetDistCalcMethod(), base);
if (numQuerys == 0) break;
for (int i = 0; i < numQuerys; i++) results[i].SetTarget(Query[i].data());
for (SizeType i = 0; i < numQuerys; i++) results[i].SetTarget(Query[i].data());
if (ftruth.is_open()) LoadTruth(ftruth, truth, numQuerys, K);
std::cout << " \t[avg] \t[99%] \t[95%] \t[recall] \t[mem]" << std::endl;
int subSize = (numQuerys - 1) / index.GetNumThreads() + 1;
SizeType subSize = (numQuerys - 1) / omp_get_num_threads() + 1;
for (std::string& mc : maxCheck)
{
index.SetParameter("MaxCheck", mc.c_str());
for (int i = 0; i < numQuerys; i++) results[i].Reset();
for (SizeType i = 0; i < numQuerys; i++) results[i].Reset();
if (index.GetNumThreads() == 1)
#pragma omp parallel for
for (int tid = 0; tid < omp_get_num_threads(); tid++)
{
for (int i = 0; i < numQuerys; i++)
SizeType start = tid * subSize;
SizeType end = min((tid + 1) * subSize, numQuerys);
for (SizeType i = start; i < end; i++)
{
latencies[i] = clock();
index.SearchIndex(results[i]);
}
}
else
{
#pragma omp parallel for
for (int tid = 0; tid < index.GetNumThreads(); tid++)
{
int start = tid * subSize;
int end = min((tid + 1) * subSize, numQuerys);
for (int i = start; i < end; i++)
{
latencies[i] = clock();
index.SearchIndex(results[i]);
}
}
}
latencies[numQuerys] = clock();
float timeMean = 0, timeMin = MaxDist, timeMax = 0, timeStd = 0;
for (int i = 0; i < numQuerys; i++)
for (SizeType i = 0; i < numQuerys; i++)
{
if (latencies[i + 1] >= latencies[i])
latencies[i] = latencies[i + 1] - latencies[i];
@ -169,16 +159,16 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
if (latencies[i] < timeMin) timeMin = (float)latencies[i];
}
timeMean /= numQuerys;
for (int i = 0; i < numQuerys; i++) timeStd += ((float)latencies[i] - timeMean) * ((float)latencies[i] - timeMean);
for (SizeType i = 0; i < numQuerys; i++) timeStd += ((float)latencies[i] - timeMean) * ((float)latencies[i] - timeMean);
timeStd = std::sqrt(timeStd / numQuerys);
log << timeMean << " " << timeStd << " " << timeMin << " " << timeMax << " ";
std::sort(latencies, latencies + numQuerys, [](int x, int y)
std::sort(latencies, latencies + numQuerys, [](clock_t x, clock_t y)
{
return x < y;
});
float l99 = float(latencies[int(numQuerys * 0.99)]) / CLOCKS_PER_SEC;
float l95 = float(latencies[int(numQuerys * 0.95)]) / CLOCKS_PER_SEC;
float l99 = float(latencies[SizeType(numQuerys * 0.99)]) / CLOCKS_PER_SEC;
float l95 = float(latencies[SizeType(numQuerys * 0.95)]) / CLOCKS_PER_SEC;
float recall = 0;
if (ftruth.is_open())
@ -202,7 +192,7 @@ int Process(Helper::IniReader& reader, VectorIndex& index)
if (fp.is_open())
{
fp << std::setprecision(3) << std::fixed;
for (int i = 0; i < numQuerys; i++)
for (SizeType i = 0; i < numQuerys; i++)
{
fp << QStrings[i] << ":";
for (int j = 0; j < K; j++)
@ -258,13 +248,13 @@ int main(int argc, char** argv)
{
std::string param(argv[i]);
size_t idx = param.find("=");
if (idx < 0) continue;
if (idx == std::string::npos) continue;
std::string paramName = param.substr(0, idx);
std::string paramVal = param.substr(idx + 1);
std::string sectionName;
idx = paramName.find(".");
if (idx >= 0) {
if (idx != std::string::npos) {
sectionName = paramName.substr(0, idx);
paramName = paramName.substr(idx + 1);
}

View File

@ -114,7 +114,7 @@ SearchService::Run()
void
SearchService::RunSocketMode()
{
auto threadNum = max((unsigned int)1, m_serviceContext->GetServiceSettings()->m_threadNum);
auto threadNum = max((SizeType)1, m_serviceContext->GetServiceSettings()->m_threadNum);
m_threadPool.reset(new boost::asio::thread_pool(threadNum));
Socket::PacketHandlerMapPtr handlerMap(new Socket::PacketHandlerMap);
@ -161,7 +161,7 @@ SearchService::RunInteractiveMode()
std::unique_ptr<char[]> inputBuffer(new char[bufferSize]);
while (true)
{
fprintf(stdout, "Query: ");
std::cout << "Query: ";
if (!fgets(inputBuffer.get(), bufferSize, stdin))
{
break;
@ -169,29 +169,28 @@ SearchService::RunInteractiveMode()
auto callback = [](std::shared_ptr<SearchExecutionContext> p_exeContext)
{
fprintf(stdout, "Result:\n");
std::cout << "Result:" << std::endl;
if (nullptr == p_exeContext)
{
fprintf(stdout, "Not Executed.\n");
std::cout << "Not Executed." << std::endl;
return;
}
const auto& results = p_exeContext->GetResults();
for (const auto& result : results)
{
fprintf(stdout, "Index: %s\n", result.m_indexName.c_str());
std::cout << "Index: " << result.m_indexName << std::endl;
int idx = 0;
for (const auto& res : result.m_results)
{
fprintf(stdout, "------------------\n");
fprintf(stdout, "DocIndex: %d Distance: %f", res.VID, res.Dist);
std::cout << "------------------" << std::endl;
std::cout << "DocIndex: " << res.VID << " Distance: " << res.Dist;
if (result.m_results.WithMeta())
{
const auto& metadata = result.m_results.GetMetadata(idx);
fprintf(stdout, " MetaData: %.*s", static_cast<int>(metadata.Length()), metadata.Data());
std::cout << " MetaData: " << std::string((char*)metadata.Data(), metadata.Length());
}
fprintf(stdout, "\n");
std::cout << std::endl;
++idx;
}
}

View File

@ -19,12 +19,12 @@ if(NOT WIN32)
endif()
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
# require at least gcc 4.7
if (CXX_COMPILER_VERSION VERSION_LESS 4.7)
message(FATAL_ERROR "GCC version must be at least 4.7!")
# require at least gcc 5.0
if (CXX_COMPILER_VERSION VERSION_LESS 5.0)
message(FATAL_ERROR "GCC version must be at least 5.0!")
endif()
set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -lm -lrt -DNDEBUG -std=c++11 -fopenmp -march=native")
set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -ggdb -lm -lrt -DNDEBUG -std=c++11 -fopenmp -march=native")
set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -lm -lrt -DNDEBUG -std=c++14 -fopenmp -march=native")
set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wno-reorder -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -ggdb -lm -lrt -DNDEBUG -std=c++14 -fopenmp -march=native")
elseif(WIN32)
if(NOT MSVC14)
message(FATAL_ERROR "On Windows, only MSVC version 14 are supported!")
@ -74,54 +74,18 @@ else()
message (FATAL_ERROR "Could no find openmp!")
endif()
#find_package(Boost 1.67 COMPONENTS system thread serialization wserialization regex)
#if (Boost_FOUND)
# include_directories (${Boost_INCLUDE_DIR})
# link_directories (${Boost_LIBRARY_DIR} "/usr/lib")
# message (STATUS "Found Boost.")
# message (STATUS "Include Path: ${Boost_INCLUDE_DIRS}")
# message (STATUS "Library Path: ${Boost_LIBRARY_DIRS}")
# message (STATUS "Library: ${Boost_LIBRARIES}")
#else()
# message (FATAL_ERROR "Could not find Boost 1.67!")
#endif()
#set(Boost_LIBRARIES
# boost_system_static
# boost_filesystem_static
# boost_serialization_static
# boost_wserialization_static
# boost_regex_static
# boost_thread_static)
set(TBB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../tbb)
if (WIN32)
if (${CMAKE_SIZEOF_VOID_P} EQUAL "8")
set (TBB_LIBRARY_SUFFIX "lib/intel64/vc14")
else()
set (TBB_LIBRARY_SUFFIX "lib/ia32/vc14")
endif()
find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "C:/Program Files/Intel/TBB" PATH_SUFFIXES include)
find_library(TBB_LIBRARIES tbb${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "C:/Program Files/Intel/TBB" PATH_SUFFIXES ${TBB_LIBRARY_SUFFIX})
find_package(Boost 1.67 COMPONENTS system thread serialization wserialization regex)
if (Boost_FOUND)
include_directories (${Boost_INCLUDE_DIR})
link_directories (${Boost_LIBRARY_DIR} "/usr/lib")
message (STATUS "Found Boost.")
message (STATUS "Include Path: ${Boost_INCLUDE_DIRS}")
message (STATUS "Library Path: ${Boost_LIBRARY_DIRS}")
message (STATUS "Library: ${Boost_LIBRARIES}")
else()
find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "/usr/" PATH_SUFFIXES include)
find_library(TBB_LIBRARIES libtbb${CMAKE_SHARED_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "/usr/")
endif()
set(TBB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../tbb)
find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "/usr/" PATH_SUFFIXES include)
find_library(TBB_LIBRARIES libtbb${CMAKE_SHARED_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "/usr/")
if (TBB_INCLUDE_DIRS AND TBB_LIBRARIES)
include_directories (${TBB_INCLUDE_DIRS})
message (STATUS "Found TBB.")
message (STATUS "Include Path:" ${TBB_INCLUDE_DIRS})
message (STATUS "Library:" ${TBB_LIBRARIES})
else()
message (FATAL_ERROR "Could not find TBB!")
message (FATAL_ERROR "Could not find Boost 1.67!")
endif()
add_subdirectory (AnnService)
#add_subdirectory (Wrappers)
#add_subdirectory (Test)
add_subdirectory (Wrappers)
add_subdirectory (Test)

View File

@ -6,7 +6,7 @@ COPY AnnService ./AnnService/
COPY Test ./Test/
COPY Wrappers ./Wrappers/
RUN apt-get update && apt-get -y install wget build-essential libtbb-dev \
RUN apt-get update && apt-get -y install wget build-essential \
# remove the following if you don't want to build the wrappers
openjdk-8-jdk python3-pip swig

View File

@ -43,7 +43,6 @@ The searches in the trees and the graph are iteratively conducted.
* swig >= 3.0
* cmake >= 3.12.0
* boost >= 1.67.0
* tbb >= 4.2
### **Install**
@ -66,7 +65,7 @@ Compiling the ALL_BUILD project in the Visual Studio (at least 2015) will genera
```bash
docker build -t sptag .
```
Will build a docker container with binaries in `/app/Release/`
Will build a docker container with binaries in `/app/Release/`.
### **Verify**
@ -75,6 +74,7 @@ Run the test (or Test.exe) in the Release folder to verify all the tests have pa
### **Usage**
The detailed usage can be found in [Get started](docs/GettingStart.md).
The detailed parameters tunning can be found in [Parameters](docs/Parameters.md).
## **References**
Please cite SPTAG in your publications if it helps your research:

View File

@ -1,4 +1,3 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
@ -66,6 +65,22 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JavaClient", "Wrappers\Java
{C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CsharpCore", "Wrappers\CsharpCore.vcxproj", "{1896C009-AD46-4A70-B83C-4652A7F37503}"
ProjectSection(ProjectDependencies) = postProject
{C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CsharpClient", "Wrappers\CsharpClient.vcxproj", "{363BA3BB-75C4-4CC7-AECB-28C7534B3710}"
ProjectSection(ProjectDependencies) = postProject
{F9A72303-6381-4C80-86FF-606A2F6F7B96} = {F9A72303-6381-4C80-86FF-606A2F6F7B96}
{C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLRCore", "Wrappers\CLRCore.vcxproj", "{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}"
ProjectSection(ProjectDependencies) = postProject
{C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
@ -162,6 +177,30 @@ Global
{8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.ActiveCfg = Debug|Win32
{8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x64.ActiveCfg = Release|x64
{8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.ActiveCfg = Release|Win32
{1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x64.ActiveCfg = Debug|x64
{1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x64.Build.0 = Debug|x64
{1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x86.ActiveCfg = Debug|Win32
{1896C009-AD46-4A70-B83C-4652A7F37503}.Debug|x86.Build.0 = Debug|Win32
{1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x64.ActiveCfg = Release|x64
{1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x64.Build.0 = Release|x64
{1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x86.ActiveCfg = Release|Win32
{1896C009-AD46-4A70-B83C-4652A7F37503}.Release|x86.Build.0 = Release|Win32
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x64.ActiveCfg = Debug|x64
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x64.Build.0 = Debug|x64
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x86.ActiveCfg = Debug|Win32
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Debug|x86.Build.0 = Debug|Win32
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x64.ActiveCfg = Release|x64
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x64.Build.0 = Release|x64
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x86.ActiveCfg = Release|Win32
{363BA3BB-75C4-4CC7-AECB-28C7534B3710}.Release|x86.Build.0 = Release|Win32
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x64.ActiveCfg = Debug|x64
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x64.Build.0 = Debug|x64
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x86.ActiveCfg = Debug|Win32
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Debug|x86.Build.0 = Debug|Win32
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x64.ActiveCfg = Release|x64
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x64.Build.0 = Release|x64
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x86.ActiveCfg = Release|Win32
{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -23,7 +23,7 @@ include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PR
file(GLOB TEST_HDR_FILES ${PROJECT_SOURCE_DIR}/Test/inc/Test.h)
file(GLOB TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/Test/src/*.cpp)
add_executable (test ${TEST_SRC_FILES} ${TEST_HDR_FILES})
target_link_libraries(test SPTAGLib ${Boost_LIBRARIES} ${TBB_LIBRARIES})
target_link_libraries(test SPTAGLib ${Boost_LIBRARIES})
install(TARGETS test
RUNTIME DESTINATION bin

View File

@ -166,8 +166,6 @@
<Import Project="..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets" Condition="Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" />
<Import Project="..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets" Condition="Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" />
<Import Project="..\packages\boost_unit_test_framework-vc140.1.67.0.0\build\boost_unit_test_framework-vc140.targets" Condition="Exists('..\packages\boost_unit_test_framework-vc140.1.67.0.0\build\boost_unit_test_framework-vc140.targets')" />
<Import Project="..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets" Condition="Exists('..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets')" />
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
</ImportGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
@ -181,7 +179,5 @@
<Error Condition="!Exists('..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_thread-vc140.1.67.0.0\build\boost_thread-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_wserialization-vc140.1.67.0.0\build\boost_wserialization-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_unit_test_framework-vc140.1.67.0.0\build\boost_unit_test_framework-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_unit_test_framework-vc140.1.67.0.0\build\boost_unit_test_framework-vc140.targets'))" />
<Error Condition="!Exists('..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets'))" />
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
</Target>
</Project>

View File

@ -2,6 +2,4 @@
<packages>
<package id="boost" version="1.67.0.0" targetFramework="native" />
<package id="boost_unit_test_framework-vc140" version="1.67.0.0" targetFramework="native" />
<package id="tbb_oss" version="9.107.0.0" targetFramework="native" />
<package id="tbb_oss.redist" version="9.107.0.0" targetFramework="native" />
</packages>

View File

@ -5,118 +5,143 @@
#include "inc/Helper/SimpleIniReader.h"
#include "inc/Core/VectorIndex.h"
#include <unordered_set>
template <typename T>
void Build(SPTAG::IndexAlgoType algo, std::string distCalcMethod, T* vec, int n, int m)
void Build(SPTAG::IndexAlgoType algo, std::string distCalcMethod, std::shared_ptr<SPTAG::VectorSet>& vec, std::shared_ptr<SPTAG::MetadataSet>& meta, const std::string out)
{
std::vector<char> meta;
std::vector<long long> metaoffset;
for (int i = 0; i < n; i++) {
metaoffset.push_back(meta.size());
std::string a = std::to_string(i);
for (int j = 0; j < a.length(); j++)
meta.push_back(a[j]);
}
metaoffset.push_back(meta.size());
std::shared_ptr<SPTAG::VectorSet> vecset(new SPTAG::BasicVectorSet(
SPTAG::ByteArray((std::uint8_t*)vec, n * m * sizeof(T), false),
SPTAG::GetEnumValueType<T>(), m, n));
std::shared_ptr<SPTAG::MetadataSet> metaset(new SPTAG::MemMetadataSet(
SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false),
SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(long long), false),
n));
std::shared_ptr<SPTAG::VectorIndex> vecIndex = SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType<T>());
vecIndex->SetParameter("DistCalcMethod", distCalcMethod);
BOOST_CHECK(nullptr != vecIndex);
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vecset, metaset));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("origindices"));
vecIndex->SetParameter("DistCalcMethod", distCalcMethod);
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vec, meta));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out));
}
template <typename T>
void Search(std::string folder, T* vec, int k)
void BuildWithMetaMapping(SPTAG::IndexAlgoType algo, std::string distCalcMethod, std::shared_ptr<SPTAG::VectorSet>& vec, std::shared_ptr<SPTAG::MetadataSet>& meta, const std::string out)
{
std::shared_ptr<SPTAG::VectorIndex> vecIndex = SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType<T>());
BOOST_CHECK(nullptr != vecIndex);
vecIndex->SetParameter("DistCalcMethod", distCalcMethod);
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vec, meta, true));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out));
}
template <typename T>
void Search(const std::string folder, T* vec, SPTAG::SizeType n, int k, std::string* truthmeta)
{
std::shared_ptr<SPTAG::VectorIndex> vecIndex;
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex));
BOOST_CHECK(nullptr != vecIndex);
SPTAG::QueryResult res(vec, k, true);
vecIndex->SearchIndex(res);
for (int i = 0; i < k; i++) {
std::cout << res.GetResult(i)->Dist << "@(" << res.GetResult(i)->VID << "," << std::string((char*)res.GetMetadata(i).Data(), res.GetMetadata(i).Length()) << ") ";
for (SPTAG::SizeType i = 0; i < n; i++)
{
SPTAG::QueryResult res(vec, k, true);
vecIndex->SearchIndex(res);
std::unordered_set<std::string> resmeta;
for (int j = 0; j < k; j++)
{
resmeta.insert(std::string((char*)res.GetMetadata(j).Data(), res.GetMetadata(j).Length()));
std::cout << res.GetResult(j)->Dist << "@(" << res.GetResult(j)->VID << "," << std::string((char*)res.GetMetadata(j).Data(), res.GetMetadata(j).Length()) << ") ";
}
std::cout << std::endl;
for (int j = 0; j < k; j++)
{
BOOST_CHECK(resmeta.find(truthmeta[i * k + j]) != resmeta.end());
}
vec += vecIndex->GetFeatureDim();
}
std::cout << std::endl;
vecIndex.reset();
}
template <typename T>
void Add(T* vec, int n)
void Add(const std::string folder, std::shared_ptr<SPTAG::VectorSet>& vec, std::shared_ptr<SPTAG::MetadataSet>& meta, const std::string out)
{
std::shared_ptr<SPTAG::VectorIndex> vecIndex;
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex("origindices", vecIndex));
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex));
BOOST_CHECK(nullptr != vecIndex);
std::vector<char> meta;
std::vector<long long> metaoffset;
for (int i = 0; i < n; i++) {
metaoffset.push_back(meta.size());
std::string a = std::to_string(vecIndex->GetNumSamples() + i);
for (int j = 0; j < a.length(); j++)
meta.push_back(a[j]);
}
metaoffset.push_back(meta.size());
int m = vecIndex->GetFeatureDim();
std::shared_ptr<SPTAG::VectorSet> vecset(new SPTAG::BasicVectorSet(
SPTAG::ByteArray((std::uint8_t*)vec, n * m * sizeof(T), false),
SPTAG::GetEnumValueType<T>(), m, n));
std::shared_ptr<SPTAG::MetadataSet> metaset(new SPTAG::MemMetadataSet(
SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false),
SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(long long), false),
n));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->AddIndex(vecset, metaset));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("addindices"));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->AddIndex(vec, meta));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out));
vecIndex.reset();
}
template <typename T>
void Delete(T* vec, int n)
void Delete(const std::string folder, T* vec, SPTAG::SizeType n, const std::string out)
{
std::shared_ptr<SPTAG::VectorIndex> vecIndex;
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex("addindices", vecIndex));
BOOST_CHECK(SPTAG::ErrorCode::Success == SPTAG::VectorIndex::LoadIndex(folder, vecIndex));
BOOST_CHECK(nullptr != vecIndex);
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->DeleteIndex((const void*)vec, n));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex("delindices"));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndex(out));
vecIndex.reset();
}
template <typename T>
void Test(SPTAG::IndexAlgoType algo, std::string distCalcMethod)
{
int n = 100, q = 3, m = 10, k = 3;
SPTAG::SizeType n = 100, q = 3;
SPTAG::DimensionType m = 10;
int k = 3;
std::vector<T> vec;
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
for (SPTAG::SizeType i = 0; i < n; i++) {
for (SPTAG::DimensionType j = 0; j < m; j++) {
vec.push_back((T)i);
}
}
std::vector<T> query;
for (int i = 0; i < q; i++) {
for (int j = 0; j < m; j++) {
for (SPTAG::SizeType i = 0; i < q; i++) {
for (SPTAG::DimensionType j = 0; j < m; j++) {
query.push_back((T)i*2);
}
}
Build<T>(algo, distCalcMethod, vec.data(), n, m);
Search<T>("origindices", query.data(), k);
Add<T>(query.data(), q);
Search<T>("addindices", query.data(), k);
Delete<T>(query.data(), q);
Search<T>("delindices", query.data(), k);
std::vector<char> meta;
std::vector<std::uint64_t> metaoffset;
for (SPTAG::SizeType i = 0; i < n; i++) {
metaoffset.push_back((std::uint64_t)meta.size());
std::string a = std::to_string(i);
for (size_t j = 0; j < a.length(); j++)
meta.push_back(a[j]);
}
metaoffset.push_back((std::uint64_t)meta.size());
std::shared_ptr<SPTAG::VectorSet> vecset(new SPTAG::BasicVectorSet(
SPTAG::ByteArray((std::uint8_t*)vec.data(), sizeof(T) * n * m, false),
SPTAG::GetEnumValueType<T>(), m, n));
std::shared_ptr<SPTAG::MetadataSet> metaset(new SPTAG::MemMetadataSet(
SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false),
SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(std::uint64_t), false),
n));
Build<T>(algo, distCalcMethod, vecset, metaset, "testindices");
std::string truthmeta1[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" };
Search<T>("testindices", query.data(), q, k, truthmeta1);
Add<T>("testindices", vecset, metaset, "testindices");
std::string truthmeta2[] = { "0", "0", "1", "2", "2", "1", "4", "4", "3" };
Search<T>("testindices", query.data(), q, k, truthmeta2);
Delete<T>("testindices", query.data(), q, "testindices");
std::string truthmeta3[] = { "1", "1", "3", "1", "3", "1", "3", "5", "3" };
Search<T>("testindices", query.data(), q, k, truthmeta3);
BuildWithMetaMapping<T>(algo, distCalcMethod, vecset, metaset, "testindices");
std::string truthmeta4[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" };
Search<T>("testindices", query.data(), q, k, truthmeta4);
Add<T>("testindices", vecset, metaset, "testindices");
std::string truthmeta5[] = { "0", "1", "2", "2", "1", "3", "4", "3", "5" };
Search<T>("testindices", query.data(), q, k, truthmeta5);
}
BOOST_AUTO_TEST_SUITE (AlgoTest)

View File

@ -6,7 +6,7 @@
#include "inc/Core/Common/DistanceUtils.h"
template<typename T>
static float ComputeCosineDistance(const T *pX, const T *pY, int length) {
static float ComputeCosineDistance(const T *pX, const T *pY, SPTAG::DimensionType length) {
float diff = 0;
const T* pEnd1 = pX + length;
while (pX < pEnd1) diff += (*pX++) * (*pY++);
@ -14,7 +14,7 @@ static float ComputeCosineDistance(const T *pX, const T *pY, int length) {
}
template<typename T>
static float ComputeL2Distance(const T *pX, const T *pY, int length)
static float ComputeL2Distance(const T *pX, const T *pY, SPTAG::DimensionType length)
{
float diff = 0;
const T* pEnd1 = pX + length;
@ -32,10 +32,10 @@ T random(int high = RAND_MAX, int low = 0) // Generates a random value.
template<typename T>
void test(int high) {
int dimension = random<int>(256, 2);
SPTAG::DimensionType dimension = random<SPTAG::DimensionType>(256, 2);
T *X = new T[dimension], *Y = new T[dimension];
BOOST_ASSERT(X != nullptr && Y != nullptr);
for (int i = 0; i < dimension; i++) {
for (SPTAG::DimensionType i = 0; i < dimension; i++) {
X[i] = random<T>(high, -high);
Y[i] = random<T>(high, -high);
}

View File

@ -1,79 +0,0 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "inc/Test.h"
#include "inc/Helper/SimpleIniReader.h"
#include "inc/Core/VectorIndex.h"
template<typename T>
void Test(SPTAG::IndexAlgoType algo, std::string distCalcMethod) {
int n = 100, q = 3, m = 10, k = 3;
std::vector<T> vec;
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
vec.push_back((T) i);
}
}
std::vector<T> query;
for (int i = 0; i < q; i++) {
for (int j = 0; j < m; j++) {
query.push_back((T) i * 2);
}
}
std::shared_ptr<SPTAG::VectorSet> vecset(new SPTAG::BasicVectorSet(
SPTAG::ByteArray((std::uint8_t *) vec.data(), n * m * sizeof(T), false),
SPTAG::GetEnumValueType<T>(), m, n));
std::vector<void *> blobs;
std::vector<int64_t> len;
{
std::shared_ptr<SPTAG::VectorIndex> vecIndex =
SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType<T>());
vecIndex->SetParameter("DistCalcMethod", distCalcMethod);
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->BuildIndex(vecset, nullptr));
BOOST_CHECK(SPTAG::ErrorCode::Success == vecIndex->SaveIndexToMemory(blobs, len));
}
std::vector<void *> clone_blobs;
std::vector<int64_t> clone_len;
for (auto i = 0; i < blobs.size(); ++i) {
auto mem = malloc(len[i]);
BOOST_CHECK(NULL != mem);
memcpy(mem, blobs[i], len[i]);
clone_blobs.push_back(mem);
clone_len.push_back(len[i]);
}
std::shared_ptr<SPTAG::VectorIndex> clone_index =
SPTAG::VectorIndex::CreateInstance(algo, SPTAG::GetEnumValueType<T>());
clone_index->SetParameter("DistCalcMethod", distCalcMethod);
BOOST_CHECK(SPTAG::ErrorCode::Success == clone_index->LoadIndexFromMemory(clone_blobs));
SPTAG::QueryResult res(vec.data(), k, true);
clone_index->SearchIndex(res);
for (int i = 0; i < k; i++) {
std::cout << res.GetResult(i)->Dist << "@(" << res.GetResult(i)->VID << ","
<< std::string((char *) res.GetMetadata(i).Data(), res.GetMetadata(i).Length()) << ") ";
}
std::cout << std::endl;
for (auto &blob : blobs)
free(blob);
for (auto &blob : clone_blobs)
free(blob);
}
BOOST_AUTO_TEST_SUITE (SerializeTest)
BOOST_AUTO_TEST_CASE(KDTree) {
Test<float>(SPTAG::IndexAlgoType::KDT, "L2");
}
BOOST_AUTO_TEST_CASE(BKTree) {
Test<float>(SPTAG::IndexAlgoType::BKT, "L2");
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -0,0 +1,141 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{38ACBA6C-2E50-44D4-9A6D-DC735B56E38F}</ProjectGuid>
<TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
<Keyword>ManagedCProj</Keyword>
<RootNamespace>CLRCore</RootNamespace>
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(SolutionDir)\AnnService.users.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CLRSupport>true</CLRSupport>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CLRSupport>true</CLRSupport>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CLRSupport>true</CLRSupport>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CLRSupport>true</CLRSupport>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup>
<TargetName>Microsoft.ANN.SPTAGManaged</TargetName>
<TargetExt>.dll</TargetExt>
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
<LibraryPath>$(OutLibDir);$(LibraryPath)</LibraryPath>
<OutDir>$(OutAppDir)</OutDir>
</PropertyGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<AdditionalDependencies>CoreLibrary.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>NDEBUG;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<AdditionalDependencies>CoreLibrary.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="inc\CLRCoreInterface.h" />
<ClInclude Include="inc\ManagedObject.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\AssemblyInfo.cpp" />
<ClCompile Include="src\CLRCoreInterface.cpp" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="$(SolutionDir)AnnService\CoreLibrary.vcxproj">
<Project>{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}</Project>
</ProjectReference>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resources">
<UniqueIdentifier>{ba4289c4-f872-4dbc-a57f-7b415614afb3}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="inc\CLRCoreInterface.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="inc\ManagedObject.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\CLRCoreInterface.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\AssemblyInfo.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -5,101 +5,167 @@ find_package(Python2 COMPONENTS Development)
if (Python2_FOUND)
include_directories (${Python2_INCLUDE_DIRS})
link_directories (${Python2_LIBRARY_DIRS})
set (Python_INCLUDE_DIRS ${Python2_INCLUDE_DIRS})
set (Python_INCLUDE_DIRS ${Python2_INCLUDE_DIRS})
set (Python_LIBRARIES ${Python2_LIBRARIES})
set (Python_FOUND true)
set (Python_FOUND true)
else()
find_package(Python3 COMPONENTS Development)
if (Python3_FOUND)
include_directories (${Python3_INCLUDE_DIRS})
link_directories (${Python3_LIBRARY_DIRS})
set (Python_INCLUDE_DIRS ${Python3_INCLUDE_DIRS})
set (Python_INCLUDE_DIRS ${Python3_INCLUDE_DIRS})
set (Python_LIBRARIES ${Python3_LIBRARIES})
set (Python_FOUND true)
set (Python_FOUND true)
endif()
endif()
if (Python_FOUND)
message (STATUS "Found Python.")
message (STATUS "Found Python.")
message (STATUS "Include Path: ${Python_INCLUDE_DIRS}")
message (STATUS "Library Path: ${Python_LIBRARIES}")
if (WIN32)
set(PY_SUFFIX .pyd)
else()
set(PY_SUFFIX .so)
endif()
execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonCore.i)
execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonClient.i)
if (WIN32)
set(PY_SUFFIX .pyd)
else()
set(PY_SUFFIX .so)
endif()
include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonCore.i)
execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonClient.i)
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp)
add_library (_SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
set_target_properties(_SPTAG PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX})
target_link_libraries(_SPTAG SPTAGLib ${Python_LIBRARIES} ${TBB_LIBRARIES})
add_custom_command(TARGET _SPTAG POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${EXECUTABLE_OUTPUT_PATH})
include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp)
add_library (_SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
set_target_properties(_SPTAGClient PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX})
target_link_libraries(_SPTAGClient SPTAGLib ${Python_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES})
add_custom_command(TARGET _SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py ${EXECUTABLE_OUTPUT_PATH})
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp)
add_library (_SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
set_target_properties(_SPTAG PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX})
target_link_libraries(_SPTAG SPTAGLib ${Python_LIBRARIES})
add_custom_command(TARGET _SPTAG POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${EXECUTABLE_OUTPUT_PATH})
install(TARGETS _SPTAG _SPTAGClient
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py DESTINATION bin)
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp)
add_library (_SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
set_target_properties(_SPTAGClient PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX})
target_link_libraries(_SPTAGClient SPTAGLib ${Python_LIBRARIES} ${Boost_LIBRARIES})
add_custom_command(TARGET _SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py ${EXECUTABLE_OUTPUT_PATH})
install(TARGETS _SPTAG _SPTAGClient
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/SPTAG.py ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py DESTINATION bin)
else()
message (STATUS "Could not find Python.")
message (STATUS "Could not find Python.")
endif()
find_package(JNI)
if (!JNI_FOUND)
if (JNI_FOUND)
include_directories (${JNI_INCLUDE_DIRS})
link_directories (${JNI_LIBRARY_DIRS})
message (STATUS "Found JNI.")
message (STATUS "Include Path: ${JNI_INCLUDE_DIRS}")
message (STATUS "Library Path: ${JNI_LIBRARIES}")
if (WIN32)
set (JAVA_SUFFIX .dll)
else()
set (JAVA_SUFFIX .so)
endif()
execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaCore.i)
execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaClient.i)
include_directories(${JNI_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
if (WIN32)
set (JAVA_SUFFIX .dll)
else()
set (JAVA_SUFFIX .so)
endif()
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp)
add_library (SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
set_target_properties(SPTAG PROPERTIES SUFFIX ${JAVA_SUFFIX})
target_link_libraries(SPTAG SPTAGLib ${JNI_LIBRARIES} ${TBB_LIBRARIES})
execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaCore.i)
execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaClient.i)
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp)
add_library (SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
set_target_properties(SPTAGClient PROPERTIES SUFFIX ${JAVA_SUFFIX})
target_link_libraries(SPTAGClient SPTAGLib ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES})
file(GLOB JAVA_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java)
foreach(JAVA_FILE ${JAVA_FILES})
message (STATUS "Add copy post-command for file " ${JAVA_FILE})
add_custom_command(TARGET SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${JAVA_FILE} ${EXECUTABLE_OUTPUT_PATH})
endforeach(JAVA_FILE)
install(TARGETS SPTAG SPTAGClient
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java DESTINATION bin)
include_directories(${JNI_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp)
add_library (JAVASPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
set_target_properties(JAVASPTAG PROPERTIES SUFFIX ${JAVA_SUFFIX})
target_link_libraries(JAVASPTAG SPTAGLib ${JNI_LIBRARIES})
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp)
add_library (JAVASPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
set_target_properties(JAVASPTAGClient PROPERTIES SUFFIX ${JAVA_SUFFIX})
target_link_libraries(JAVASPTAGClient SPTAGLib ${JNI_LIBRARIES} ${Boost_LIBRARIES})
file(GLOB JAVA_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java)
foreach(JAVA_FILE ${JAVA_FILES})
message (STATUS "Add copy post-command for file " ${JAVA_FILE})
add_custom_command(TARGET JAVASPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${JAVA_FILE} ${EXECUTABLE_OUTPUT_PATH})
endforeach(JAVA_FILE)
install(TARGETS JAVASPTAG JAVASPTAGClient
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java DESTINATION bin)
else()
message (STATUS "Could not find JNI.")
endif()
message (STATUS "Could not find JNI.")
endif()
if (WIN32)
if (${PROJECTNAME_ARCHITECTURE} MATCHES "x64")
set (csharp_dotnet_framework_hints "$ENV{windir}\\Microsoft.NET\\Framework64")
else()
set (csharp_dotnet_framework_hints "$ENV{windir}\\Microsoft.NET\\Framework")
endif()
file(GLOB_RECURSE csharp_dotnet_executables ${csharp_dotnet_framework_hints}/csc.exe)
list(SORT csharp_dotnet_executables)
list(REVERSE csharp_dotnet_executables)
foreach (csharp_dotnet_executable ${csharp_dotnet_executables})
if (NOT DEFINED DOTNET_FOUND)
string(REPLACE "${csharp_dotnet_framework_hints}/" "" csharp_dotnet_version_temp ${csharp_dotnet_executable})
string(REPLACE "/csc.exe" "" csharp_dotnet_version_temp ${csharp_dotnet_version_temp})
set (DOTNET_EXECUTABLE_VERSION "${csharp_dotnet_version_temp}" CACHE STRING "C# .NET compiler version" FORCE)
set (DOTNET_FOUND ${csharp_dotnet_executable})
endif()
endforeach(csharp_dotnet_executable)
else()
FIND_PROGRAM(DOTNET_FOUND dotnet)
endif()
if (DOTNET_FOUND)
message (STATUS "Found dotnet.")
message (STATUS "DOTNET_EXECUTABLE: " ${DOTNET_FOUND})
if (WIN32)
set (CSHARP_SUFFIX .dll)
else()
set (CSHARP_SUFFIX .so)
endif()
execute_process(COMMAND swig -csharp -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_cwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CsharpCore.i)
execute_process(COMMAND swig -csharp -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_cwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CsharpClient.i)
include_directories(${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers)
file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h)
file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_cwrap.cpp)
add_library (CSHARPSPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES})
set_target_properties(CSHARPSPTAG PROPERTIES SUFFIX ${CSHARP_SUFFIX})
target_link_libraries(CSHARPSPTAG SPTAGLib)
file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h)
file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_cwrap.cpp)
add_library (CSHARPSPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES})
set_target_properties(CSHARPSPTAGClient PROPERTIES SUFFIX ${CSHARP_SUFFIX})
target_link_libraries(CSHARPSPTAGClient SPTAGLib ${Boost_LIBRARIES})
file(GLOB CSHARP_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.cs)
foreach(CSHARP_FILE ${CSHARP_FILES})
message (STATUS "Add copy post-command for file " ${CSHARP_FILE})
add_custom_command(TARGET CSHARPSPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CSHARP_FILE} ${EXECUTABLE_OUTPUT_PATH})
endforeach(CSHARP_FILE)
install(TARGETS CSHARPSPTAG CSHARPSPTAGClient
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.cs DESTINATION bin)
else()
message (STATUS "Could not find C#.")
endif()

View File

@ -0,0 +1,191 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{363BA3BB-75C4-4CC7-AECB-28C7534B3710}</ProjectGuid>
<RootNamespace>CsharpClient</RootNamespace>
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(SolutionDir)\AnnService.users.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup>
<TargetName>CSHARPSPTAGClient</TargetName>
<TargetExt>.dll</TargetExt>
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
<LibraryPath>$(OutLibDir);$(LibraryPath)</LibraryPath>
<OutDir>$(OutAppDir)</OutDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup>
<Link>
<AdditionalDependencies>CoreLibrary.lib;SocketLib.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<PreprocessorDefinitions>_WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<PreprocessorDefinitions>_WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ControlFlowGuard>Guard</ControlFlowGuard>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<AdditionalOptions>/guard:cf %(AdditionalOptions)</AdditionalOptions>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="inc\ClientInterface.h" />
<ClInclude Include="inc\TransferDataType.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\ClientInterface.cpp" />
<ClCompile Include="$(IntDir)ClientInterface_cwrap.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="inc\CsharpClient.i">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
</None>
</ItemGroup>
<ItemGroup>
<None Include="inc\CsharpCommon.i" />
<None Include="packages.config" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="..\packages\boost.1.67.0.0\build\boost.targets" Condition="Exists('..\packages\boost.1.67.0.0\build\boost.targets')" />
<Import Project="..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets" Condition="Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" />
<Import Project="..\packages\boost_date_time-vc140.1.67.0.0\build\boost_date_time-vc140.targets" Condition="Exists('..\packages\boost_date_time-vc140.1.67.0.0\build\boost_date_time-vc140.targets')" />
<Import Project="..\packages\boost_regex-vc140.1.67.0.0\build\boost_regex-vc140.targets" Condition="Exists('..\packages\boost_regex-vc140.1.67.0.0\build\boost_regex-vc140.targets')" />
</ImportGroup>
<Target Name="BeforeBuild" BeforeTargets="PrepareForBuild">
<MakeDir Directories="$(IntDir)" />
<Exec Command="$(SolutionDir)packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe -csharp -c++ -I$(IntDir) -outdir $(IntDir) -o $(IntDir)/ClientInterface_cwrap.cpp inc\CsharpClient.i" />
</Target>
<ItemGroup>
<MySourceFiles Include="$(IntDir)\*.cs" />
</ItemGroup>
<Target Name="CopyFiles" AfterTargets="BeforeBuild">
<Copy SourceFiles="@(MySourceFiles)" DestinationFolder="$(Outdir)" />
</Target>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe'))" />
<Error Condition="!Exists('..\packages\boost.1.67.0.0\build\boost.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost.1.67.0.0\build\boost.targets'))" />
<Error Condition="!Exists('..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_system-vc140.1.67.0.0\build\boost_system-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_date_time-vc140.1.67.0.0\build\boost_date_time-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_date_time-vc140.1.67.0.0\build\boost_date_time-vc140.targets'))" />
<Error Condition="!Exists('..\packages\boost_regex-vc140.1.67.0.0\build\boost_regex-vc140.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\boost_regex-vc140.1.67.0.0\build\boost_regex-vc140.targets'))" />
</Target>
</Project>

View File

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="inc\ClientInterface.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="inc\TransferDataType.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\ClientInterface.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="$(IntDir)ClientInterface_cwrap.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="inc\CsharpClient.i">
<Filter>Resource Files</Filter>
</None>
<None Include="inc\CsharpCommon.i">
<Filter>Resource Files</Filter>
</None>
</ItemGroup>
</Project>

View File

@ -0,0 +1,134 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{1896C009-AD46-4A70-B83C-4652A7F37503}</ProjectGuid>
<RootNamespace>CsharpCore</RootNamespace>
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(SolutionDir)\AnnService.users.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup>
<TargetName>CSHARPSPTAG</TargetName>
<TargetExt>.dll</TargetExt>
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
<LibraryPath>$(OutLibDir);$(LibraryPath)</LibraryPath>
<OutDir>$(OutAppDir)</OutDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup>
<Link>
<AdditionalDependencies>CoreLibrary.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup>
<ClCompile>
<PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">_WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<ControlFlowGuard Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Guard</ControlFlowGuard>
<DebugInformationFormat Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ProgramDatabase</DebugInformationFormat>
<PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">_WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/guard:cf %(AdditionalOptions)</AdditionalOptions>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="inc\CoreInterface.h" />
<ClInclude Include="inc\TransferDataType.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\CoreInterface.cpp" />
<ClCompile Include="$(IntDir)CoreInterface_cwrap.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="inc\CsharpCommon.i" />
<None Include="inc\CsharpCore.i" />
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
<Target Name="BeforeBuild" BeforeTargets="PrepareForBuild">
<MakeDir Directories="$(IntDir)" />
<Exec Command="$(SolutionDir)packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe -csharp -c++ -I$(IntDir) -outdir $(IntDir) -o $(IntDir)CoreInterface_cwrap.cpp inc\CsharpCore.i" />
</Target>
<ItemGroup>
<MySourceFiles Include="$(IntDir)\*.cs" />
</ItemGroup>
<Target Name="CopyFiles" AfterTargets="BeforeBuild">
<Copy SourceFiles="@(MySourceFiles)" DestinationFolder="$(Outdir)" />
</Target>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe'))" />
</Target>
</Project>

View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resources">
<UniqueIdentifier>{ba4289c4-f872-4dbc-a57f-7b415614afb3}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="inc\CoreInterface.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="inc\TransferDataType.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\CoreInterface.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="$(IntDir)CoreInterface_cwrap.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="inc\CsharpCore.i">
<Filter>Resources</Filter>
</None>
<None Include="inc\CsharpCommon.i">
<Filter>Resources</Filter>
</None>
</ItemGroup>
</Project>

View File

@ -70,7 +70,7 @@
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup>
<TargetName>SPTAGClient</TargetName>
<TargetName>JAVASPTAGClient</TargetName>
<TargetExt>.dll</TargetExt>
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
@ -158,6 +158,7 @@
</None>
</ItemGroup>
<ItemGroup>
<None Include="inc\JavaCommon.i" />
<None Include="packages.config" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

View File

@ -34,5 +34,8 @@
<None Include="inc\JavaClient.i">
<Filter>Resource Files</Filter>
</None>
<None Include="inc\JavaCommon.i">
<Filter>Resource Files</Filter>
</None>
</ItemGroup>
</Project>

View File

@ -70,7 +70,7 @@
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup>
<TargetName>SPTAG</TargetName>
<TargetName>JAVASPTAG</TargetName>
<TargetExt>.dll</TargetExt>
<IntDir>$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\</IntDir>
<IncludePath>$(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath)</IncludePath>
@ -106,6 +106,7 @@
<ClCompile Include="$(IntDir)CoreInterface_jwrap.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="inc\JavaCommon.i" />
<None Include="inc\JavaCore.i" />
</ItemGroup>
<ItemGroup>
@ -113,8 +114,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets" Condition="Exists('..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets')" />
<Import Project="..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets" Condition="Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" />
</ImportGroup>
<Target Name="BeforeBuild" BeforeTargets="PrepareForBuild">
<MakeDir Directories="$(IntDir)" />
@ -131,7 +130,5 @@
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\swigwin.3.0.9\tools\swigwin-3.0.9\swig.exe'))" />
<Error Condition="!Exists('..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.redist.9.107.0.0\build\native\tbb_oss.redist.targets'))" />
<Error Condition="!Exists('..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\tbb_oss.9.107.0.0\build\native\tbb_oss.targets'))" />
</Target>
</Project>

View File

@ -30,9 +30,11 @@
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
<None Include="inc\JavaCore.i">
<Filter>Resources</Filter>
</None>
<None Include="inc\JavaCommon.i">
<Filter>Resources</Filter>
</None>
</ItemGroup>
</Project>

Some files were not shown because too many files have changed in this diff Show More