diff --git a/cpp/unittest/faiss_wrapper/wrapper_test.cpp b/cpp/unittest/faiss_wrapper/wrapper_test.cpp index c1b4ff2cb0..67a6c3cde8 100644 --- a/cpp/unittest/faiss_wrapper/wrapper_test.cpp +++ b/cpp/unittest/faiss_wrapper/wrapper_test.cpp @@ -9,144 +9,130 @@ #include "wrapper/Operand.h" #include "wrapper/Index.h" #include "wrapper/IndexBuilder.h" -#include + using namespace zilliz::milvus::engine; -TEST(xxx, Wrapper_Test){ - // std::string xxx = "dialect+driver://username:password@host:port/database"; - //mysql://scott:tiger@localhost/mydatabase +TEST(operand_test, Wrapper_Test) { + using std::cout; + using std::endl; - std::string littel_xx = "dixx://"; - std::regex xx_regex("([a-zA-Z0-9_-\\.]*):\\/\\/([a-zA-Z0-9_-\\.]*)\\:([a-zA-Z0-9_-\\.]*)\\@([a-zA-Z0-9_-\\.]*)\\:([0-9]*)\\/([a-zA-Z0-9_-\\.]*)"); - std::smatch pieces_match; - std::regex_match(littel_xx, pieces_match, xx_regex); + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->preproc = "OPQ"; + opd->postproc = "PQ"; + opd->metric_type = "L2"; + opd->d = 64; - for(auto &x : pieces_match) { - std::cout << "hehhe: " << x.str() << std::endl; - } + auto opd_str = operand_to_str(opd); + auto new_opd = str_to_operand(opd_str); + + // TODO: fix all place where using opd to build index. + assert(new_opd->get_index_type(10000) == opd->get_index_type(10000)); } -//TEST(operand_test, Wrapper_Test) { -// using std::cout; -// using std::endl; -// -// auto opd = std::make_shared(); -// opd->index_type = "IVF"; -// opd->preproc = "OPQ"; -// opd->postproc = "PQ"; -// opd->metric_type = "L2"; -// opd->d = 64; -// -// auto opd_str = operand_to_str(opd); -// auto new_opd = str_to_operand(opd_str); -// -// // TODO: fix all place where using opd to build index. -// assert(new_opd->get_index_type(10000) == opd->get_index_type(10000)); -//} -// -//TEST(build_test, Wrapper_Test) { -// // dimension of the vectors to index -// int d = 3; -// -// // make a set of nt training vectors in the unit cube -// size_t nt = 10000; -// -// // a reasonable number of cetroids to index nb vectors -// int ncentroids = 16; -// -// std::random_device rd; -// std::mt19937 gen(rd()); -// -// std::vector xb; -// std::vector ids; -// -// //prepare train data -// std::uniform_real_distribution<> dis_xt(-1.0, 1.0); -// std::vector xt(nt * d); -// for (size_t i = 0; i < nt * d; i++) { -// xt[i] = dis_xt(gen); -// } -// -// //train the index -// auto opd = std::make_shared(); -// opd->index_type = "IVF"; -// opd->d = d; -// opd->ncent = ncentroids; -// IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); -// auto index_1 = index_builder_1->build_all(0, xb, ids, nt, xt); -// ASSERT_TRUE(index_1 != nullptr); -// -// // size of the database we plan to index -// size_t nb = 100000; -// -// //prepare raw data -// xb.resize(nb); -// ids.resize(nb); -// for (size_t i = 0; i < nb; i++) { -// xb[i] = dis_xt(gen); -// ids[i] = i; -// } -// index_1->add_with_ids(nb, xb.data(), ids.data()); -// -// //search in first quadrant -// int nq = 1, k = 10; -// std::vector xq = {0.5, 0.5, 0.5}; -// float *result_dists = new float[k]; -// long *result_ids = new long[k]; -// index_1->search(nq, xq.data(), k, result_dists, result_ids); -// -// for (int i = 0; i < k; i++) { -// if (result_ids[i] < 0) { -// ASSERT_TRUE(false); -// break; -// } -// -// long id = result_ids[i]; -// std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", " -// << xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl; -// -// //makesure result vector is in first quadrant -// ASSERT_TRUE(xb[id * 3] > 0.0); -// ASSERT_TRUE(xb[id * 3 + 1] > 0.0); -// ASSERT_TRUE(xb[id * 3 + 2] > 0.0); -// } -// -// delete[] result_dists; -// delete[] result_ids; -//} -// -//TEST(gpu_build_test, Wrapper_Test) { -// using std::vector; -// -// int d = 256; -// int nb = 3 * 1000 * 100; -// int nq = 100; -// vector xb(d * nb); -// vector xq(d * nq); -// vector ids(nb); -// -// std::random_device rd; -// std::mt19937 gen(rd()); -// std::uniform_real_distribution<> dis_xt(-1.0, 1.0); -// for (auto &e : xb) { e = float(dis_xt(gen)); } -// for (auto &e : xq) { e = float(dis_xt(gen)); } -// for (int i = 0; i < nb; ++i) { ids[i] = i; } -// -// auto opd = std::make_shared(); -// opd->index_type = "IVF"; -// opd->d = d; -// opd->ncent = 256; -// -// IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); -// auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data()); -// assert(index_1->ntotal == nb); -// assert(index_1->dim == d); -// -// // sanity check: search 5 first vectors of xb -// int k = 1; -// vector I(5 * k); -// vector D(5 * k); -// index_1->search(5, xb.data(), k, D.data(), I.data()); -// for (int i = 0; i < 5; ++i) { assert(i == I[i]); } -//} +TEST(build_test, Wrapper_Test) { + // dimension of the vectors to index + int d = 3; + + // make a set of nt training vectors in the unit cube + size_t nt = 10000; + + // a reasonable number of cetroids to index nb vectors + int ncentroids = 16; + + std::random_device rd; + std::mt19937 gen(rd()); + + std::vector xb; + std::vector ids; + + //prepare train data + std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + std::vector xt(nt * d); + for (size_t i = 0; i < nt * d; i++) { + xt[i] = dis_xt(gen); + } + + //train the index + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->d = d; + opd->ncent = ncentroids; + IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); + auto index_1 = index_builder_1->build_all(0, xb, ids, nt, xt); + ASSERT_TRUE(index_1 != nullptr); + + // size of the database we plan to index + size_t nb = 100000; + + //prepare raw data + xb.resize(nb); + ids.resize(nb); + for (size_t i = 0; i < nb; i++) { + xb[i] = dis_xt(gen); + ids[i] = i; + } + index_1->add_with_ids(nb, xb.data(), ids.data()); + + //search in first quadrant + int nq = 1, k = 10; + std::vector xq = {0.5, 0.5, 0.5}; + float *result_dists = new float[k]; + long *result_ids = new long[k]; + index_1->search(nq, xq.data(), k, result_dists, result_ids); + + for (int i = 0; i < k; i++) { + if (result_ids[i] < 0) { + ASSERT_TRUE(false); + break; + } + + long id = result_ids[i]; + std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", " + << xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl; + + //makesure result vector is in first quadrant + ASSERT_TRUE(xb[id * 3] > 0.0); + ASSERT_TRUE(xb[id * 3 + 1] > 0.0); + ASSERT_TRUE(xb[id * 3 + 2] > 0.0); + } + + delete[] result_dists; + delete[] result_ids; +} + +TEST(gpu_build_test, Wrapper_Test) { + using std::vector; + + int d = 256; + int nb = 3 * 1000 * 100; + int nq = 100; + vector xb(d * nb); + vector xq(d * nq); + vector ids(nb); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + for (auto &e : xb) { e = float(dis_xt(gen)); } + for (auto &e : xq) { e = float(dis_xt(gen)); } + for (int i = 0; i < nb; ++i) { ids[i] = i; } + + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->d = d; + opd->ncent = 256; + + IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); + auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data()); + assert(index_1->ntotal == nb); + assert(index_1->dim == d); + + // sanity check: search 5 first vectors of xb + int k = 1; + vector I(5 * k); + vector D(5 * k); + index_1->search(5, xb.data(), k, D.data(), I.data()); + for (int i = 0; i < 5; ++i) { assert(i == I[i]); } +}