add thrift service

Former-commit-id: a26d0befbb2d06095c003edc3d87e79ca4911b41
This commit is contained in:
groot 2019-04-16 12:10:27 +08:00
commit 397d1cfcd2
14 changed files with 449 additions and 3 deletions

View File

@ -8,6 +8,7 @@ aux_source_directory(cache cache_files)
aux_source_directory(config config_files)
aux_source_directory(server server_files)
aux_source_directory(utils utils_files)
aux_source_directory(wrapper wrapper_files)
set(service_files
thrift/gen-cpp/VecService.cpp
@ -17,7 +18,10 @@ set(service_files
set(vecwise_engine_src
${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
${cache_files}
)
${wrapper_files})
include_directories(/usr/local/cuda/include)
find_library(cuda_library cudart cublas HINTS /usr/local/cuda/lib64)
add_library(vecwise_engine STATIC ${vecwise_engine_src})
@ -26,6 +30,7 @@ add_executable(vecwise_engine_server
${server_files}
${utils_files}
${service_files}
${wrapper_files}
${VECWISE_THIRD_PARTY_BUILD}/include/easylogging++.cc
)
@ -36,5 +41,6 @@ set(dependency_libs
boost_filesystem
thrift
pthread
faiss
)
target_link_libraries(vecwise_engine_server ${dependency_libs})
target_link_libraries(vecwise_engine_server ${dependency_libs} ${cuda_library})

View File

@ -29,6 +29,9 @@ public:
virtual Status add_vectors(const std::string& group_id_,
size_t n, const float* vectors, IDNumbers& vector_ids_) = 0;
virtual Status search(const std::string& group_id, size_t k, size_t nq,
const float* vectors, QueryResults& results) = 0;
DB() = default;
DB(const DB&) = delete;
DB& operator=(const DB&) = delete;

View File

@ -52,6 +52,12 @@ Status DBImpl::add_vectors(const std::string& group_id_,
}
}
Status DBImpl::search(const std::string& group_id, size_t k, size_t nq,
const float* vectors, QueryResults& results) {
// PXU TODO
return Status::OK();
}
void DBImpl::start_timer_task(int interval_) {
std::thread bg_task(&DBImpl::background_timer_task, this, interval_);
bg_task.detach();

View File

@ -31,6 +31,9 @@ public:
virtual Status add_vectors(const std::string& group_id_,
size_t n, const float* vectors, IDNumbers& vector_ids_) override;
virtual Status search(const std::string& group_id, size_t k, size_t nq,
const float* vectors, QueryResults& results) override;
virtual ~DBImpl();
private:

View File

@ -10,6 +10,9 @@ typedef long IDNumber;
typedef IDNumber* IDNumberPtr;
typedef std::vector<IDNumber> IDNumbers;
typedef std::vector<IDNumber> QueryResult;
typedef std::vector<QueryResult> QueryResults;
} // namespace engine
} // namespace vecwise

67
cpp/src/wrapper/Index.cpp Normal file
View File

@ -0,0 +1,67 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#ifdef CUDA_VERSION
#include "faiss/gpu/GpuAutoTune.h"
#include "faiss/gpu/StandardGpuResources.h"
#include "faiss/gpu/utils/DeviceUtils.h"
#endif
#include "Index.h"
namespace zilliz {
namespace vecwise {
namespace engine {
using std::string;
using std::unordered_map;
using std::vector;
Index::Index(const std::shared_ptr<faiss::Index> &raw_index) {
index_ = raw_index;
dim = index_->d;
ntotal = index_->ntotal;
store_on_gpu = false;
}
bool Index::reset() {
try {
index_->reset();
ntotal = index_->ntotal;
}
catch (std::exception &e) {
// LOG(ERROR) << e.what();
return false;
}
return true;
}
bool Index::add_with_ids(idx_t n, const float *xdata, const long *xids) {
try {
index_->add_with_ids(n, xdata, xids);
ntotal += n;
}
catch (std::exception &e) {
// LOG(ERROR) << e.what();
return false;
}
return true;
}
bool Index::search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const {
try {
index_->search(n, data, k, distances, labels);
}
catch (std::exception &e) {
// LOG(ERROR) << e.what();
return false;
}
return true;
}
}
}
}

82
cpp/src/wrapper/Index.h Normal file
View File

@ -0,0 +1,82 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <vector>
#include <string>
#include <unordered_map>
#include <memory>
#include <fstream>
#include "faiss/AutoTune.h"
#include "faiss/AuxIndexStructures.h"
#include "faiss/gpu/GpuAutoTune.h"
#include "faiss/index_io.h"
#include "Operand.h"
namespace zilliz {
namespace vecwise {
namespace engine {
class Index {
typedef long idx_t;
public:
int dim; ///< std::vector dimension
idx_t ntotal; ///< total nb of indexed std::vectors
bool store_on_gpu;
explicit Index(const std::shared_ptr<faiss::Index> &raw_index);
virtual bool reset();
/**
* @brief Same as add, but stores xids instead of sequential ids.
*
* @param data input matrix, size n * d
* @param if ids is not empty ids for the std::vectors
*/
virtual bool add_with_ids(idx_t n, const float *xdata, const long *xids);
/**
* @brief for each query std::vector, find its k nearest neighbors in the database
*
* @param n queries size
* @param data query std::vectors
* @param k top k nearest neighbors
* @param distances top k nearest distances
* @param labels neighbors of the queries
*/
virtual bool search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const;
// virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location);
// virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove);
// virtual bool index_display();
private:
std::shared_ptr<faiss::Index> index_ = nullptr;
// std::vector<faiss::gpu::GpuResources *> res_;
// std::vector<int> devs_;
// bool usegpu = true;
// int ngpus = 0;
// faiss::gpu::GpuMultipleClonerOptions *options = new faiss::gpu::GpuMultipleClonerOptions();
};
using Index_ptr = std::shared_ptr<Index>;
extern void write_index(const Index_ptr &index, const std::string &file_name);
extern Index_ptr read_index(const std::string &file_name);
}
}
}

View File

@ -0,0 +1,51 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "mutex"
#include "IndexBuilder.h"
namespace zilliz {
namespace vecwise {
namespace engine {
using std::vector;
// todo(linxj): use ResourceMgr instead
static std::mutex cpu_resource;
IndexBuilder::IndexBuilder(const Operand_ptr &opd) {
opd_ = opd;
}
Index_ptr IndexBuilder::build_all(const long &nb, const vector<float> &xb,
const vector<long> &ids,
const long &nt, const vector<float> &xt) {
std::shared_ptr<faiss::Index> index = nullptr;
index.reset(faiss::index_factory(opd_->d, opd_->index_type.c_str()));
{
// currently only cpu resources are used.
std::lock_guard<std::mutex> lk(cpu_resource);
if (!index->is_trained) {
nt == 0 || xt.empty() ? index->train(nb, xb.data())
: index->train(nt, xt.data());
}
index->add(nb, xb.data());
index->add_with_ids(nb, xb.data(), ids.data()); // todo(linxj): support add_with_idmap
}
return std::make_shared<Index>(index);
}
// Be Factory pattern later
IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) {
return std::make_shared<IndexBuilder>(opd);
}
}
}
}

View File

@ -0,0 +1,51 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "faiss/Index.h"
#include "Operand.h"
#include "Index.h"
namespace zilliz {
namespace vecwise {
namespace engine {
class IndexBuilder {
public:
explicit IndexBuilder(const Operand_ptr &opd);
Index_ptr build_all(const long &nb,
const std::vector<float> &xb,
const std::vector<long> &ids,
const long &nt = 0,
const std::vector<float> &xt = std::vector<float>());
void train(const long &nt,
const std::vector<float> &xt);
Index_ptr add(const long &nb,
const std::vector<float> &xb,
const std::vector<long> &ids);
void set_build_option(const Operand_ptr &opd);
private:
Operand_ptr opd_ = nullptr;
// std::shared_ptr<faiss::Index> index_ = nullptr;
};
using IndexBuilderPtr = std::shared_ptr<IndexBuilder>;
extern IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd);
}
}
}

View File

@ -0,0 +1,49 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "Operand.h"
namespace zilliz {
namespace vecwise {
namespace engine {
std::ostream &operator<<(std::ostream &os, const Operand &obj) {
os << obj.d << " "
<< obj.index_type << " "
<< obj.preproc << " "
<< obj.postproc << " "
<< obj.metric_type << " "
<< obj.ncent;
return os;
}
std::istream &operator>>(std::istream &is, Operand &obj) {
is >> obj.d
>> obj.index_type
>> obj.preproc
>> obj.postproc
>> obj.metric_type
>> obj.ncent;
return is;
}
std::string operand_to_str(const Operand_ptr &opd) {
std::ostringstream ss;
ss << opd;
return ss.str();
}
Operand_ptr str_to_operand(const std::string &input) {
std::istringstream is(input);
auto opd = std::make_shared<Operand>();
is >> *(opd.get());
return opd;
}
}
}
}

40
cpp/src/wrapper/Operand.h Normal file
View File

@ -0,0 +1,40 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <string>
#include <memory>
#include <iostream>
#include <sstream>
namespace zilliz {
namespace vecwise {
namespace engine {
struct Operand {
friend std::ostream &operator<<(std::ostream &os, const Operand &obj);
friend std::istream &operator>>(std::istream &is, Operand &obj);
int d;
std::string index_type = "IVF13864,Flat";
std::string preproc;
std::string postproc;
std::string metric_type = "L2"; // L2 / Inner Product
int ncent;
};
using Operand_ptr = std::shared_ptr<Operand>;
extern std::string operand_to_str(const Operand_ptr &opd);
extern Operand_ptr str_to_operand(const std::string &input);
}
}
}

View File

@ -17,4 +17,5 @@ set(unittest_libs
pthread)
add_subdirectory(cache)
add_subdirectory(log)
add_subdirectory(log)
add_subdirectory(faiss_wrapper)

View File

@ -0,0 +1,26 @@
#-------------------------------------------------------------------------------
# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
# Unauthorized copying of this file, via any medium is strictly prohibited.
# Proprietary and confidential.
#-------------------------------------------------------------------------------
include_directories(../../src)
aux_source_directory(../../src/wrapper wrapper_src)
# Make sure that your call to link_directories takes place before your call to the relevant add_executable.
include_directories(/usr/local/cuda/include)
link_directories("/usr/local/cuda/lib64")
set(wrapper_test_src
${unittest_srcs}
${wrapper_src}
wrapper_test.cpp)
add_executable(wrapper_test ${wrapper_test_src})
set(faiss_libs
faiss
cudart
cublas
)
target_link_libraries(wrapper_test ${unittest_libs} ${faiss_libs})

View File

@ -0,0 +1,58 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
#include "wrapper/Operand.h"
#include "wrapper/Index.h"
#include "wrapper/IndexBuilder.h"
using namespace zilliz::vecwise::engine;
TEST(operand_test, Wrapper_Test) {
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF16384,Flat";
opd->d = 256;
std::cout << opd << std::endl;
}
TEST(build_test, Wrapper_Test) {
// dimension of the vectors to index
int d = 64;
// size of the database we plan to index
size_t nb = 100000;
// make a set of nt training vectors in the unit cube
size_t nt = 150000;
// a reasonable number of cetroids to index nb vectors
int ncentroids = 25;
srand48(35); // seed
std::vector<float> xb(nb * d);
for (size_t i = 0; i < nb * d; i++) {
xb[i] = drand48();
}
std::vector<long> ids(nb);
for (size_t i = 0; i < nb; i++) {
ids[i] = drand48();
}
std::vector<float> xt(nt * d);
for (size_t i = 0; i < nt * d; i++) {
xt[i] = drand48();
}
auto opd = std::make_shared<Operand>();
IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd);
auto index_1 = index_builder_1->build_all(nb, xb, ids, nt, xt);
}