From 26f182c7afdf9851ae1cac4a4cf41fd35804119d Mon Sep 17 00:00:00 2001 From: groot Date: Sun, 30 Jun 2019 18:53:43 +0800 Subject: [PATCH] refine unitest code Former-commit-id: 7a78a5d5bd966d34d9ccd6f52e3b1c533da3a1a0 --- cpp/unittest/db/db_tests.cpp | 258 +------------------------- cpp/unittest/db/mysql_db_test.cpp | 293 ++++++++++++++++++++++++++++++ 2 files changed, 299 insertions(+), 252 deletions(-) create mode 100644 cpp/unittest/db/mysql_db_test.cpp diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8e50b7403b..d505320e86 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -20,6 +20,8 @@ namespace { static const std::string TABLE_NAME = "test_group"; static constexpr int64_t TABLE_DIM = 256; + static constexpr int64_t VECTOR_COUNT = 250000; + static constexpr int64_t INSERT_LOOP = 100000; engine::meta::TableSchema BuildTableSchema() { engine::meta::TableSchema table_info; @@ -144,7 +146,7 @@ TEST_F(DBTest, DB_TEST) { } }); - int loop = 100000; + int loop = INSERT_LOOP; for (auto i=0; i xb(nb*TABLE_DIM); @@ -246,7 +248,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { std::vector xb; BuildVectors(nb, xb); - int loop = 100000; + int loop = INSERT_LOOP; for (auto i=0; iInsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); std::this_thread::sleep_for(std::chrono::microseconds(1)); @@ -277,7 +279,7 @@ TEST_F(DBTest2, DELETE_TEST) { uint64_t size; db_->Size(size); - int64_t nb = 100000; + int64_t nb = INSERT_LOOP; std::vector xb; BuildVectors(nb, xb); @@ -293,251 +295,3 @@ TEST_F(DBTest2, DELETE_TEST) { ASSERT_TRUE(stat.ok()); ASSERT_FALSE(boost::filesystem::exists(table_info_get.location_)); }; - -TEST_F(MySQLDBTest, DB_TEST) { - - auto options = GetOptions(); - auto db_ = engine::DBFactory::Build(options); - - engine::meta::TableSchema table_info = BuildTableSchema(); - engine::Status stat = db_->CreateTable(table_info); - - engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = TABLE_NAME; - stat = db_->DescribeTable(table_info_get); - ASSERT_STATS(stat); - ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); - - engine::IDNumbers vector_ids; - engine::IDNumbers target_ids; - - int64_t nb = 50; - std::vector xb; - BuildVectors(nb, xb); - - int64_t qb = 5; - std::vector qxb; - BuildVectors(qb, qxb); - - std::thread search([&]() { - engine::QueryResults results; - int k = 10; - std::this_thread::sleep_for(std::chrono::seconds(2)); - - INIT_TIMER; - std::stringstream ss; - uint64_t count = 0; - uint64_t prev_count = 0; - - for (auto j=0; j<10; ++j) { - ss.str(""); - db_->Size(count); - prev_count = count; - - START_TIMER; - stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); - ss << "Search " << j << " With Size " << count/engine::meta::M << " M"; - STOP_TIMER(ss.str()); - - ASSERT_STATS(stat); - for (auto k=0; k= prev_count); - std::this_thread::sleep_for(std::chrono::seconds(1)); - } - }); - - int loop = 100000; - - for (auto i=0; iInsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); - ASSERT_EQ(target_ids.size(), qb); - } else { - db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); - } - std::this_thread::sleep_for(std::chrono::microseconds(1)); - } - - search.join(); - - delete db_; - - auto dummyDB = engine::DBFactory::Build(options); - dummyDB->DropAll(); - delete dummyDB; -}; - -TEST_F(MySQLDBTest, SEARCH_TEST) { - auto options = GetOptions(); - auto db_ = engine::DBFactory::Build(options); - - engine::meta::TableSchema table_info = BuildTableSchema(); - engine::Status stat = db_->CreateTable(table_info); - - engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = TABLE_NAME; - stat = db_->DescribeTable(table_info_get); - ASSERT_STATS(stat); - ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); - - // prepare raw data - size_t nb = 250000; - size_t nq = 10; - size_t k = 5; - std::vector xb(nb*TABLE_DIM); - std::vector xq(nq*TABLE_DIM); - std::vector ids(nb); - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution<> dis_xt(-1.0, 1.0); - for (size_t i = 0; i < nb*TABLE_DIM; i++) { - xb[i] = dis_xt(gen); - if (i < nb){ - ids[i] = i; - } - } - for (size_t i = 0; i < nq*TABLE_DIM; i++) { - xq[i] = dis_xt(gen); - } - - // result data - //std::vector nns_gt(k*nq); - std::vector nns(k*nq); // nns = nearst neg search - //std::vector dis_gt(k*nq); - std::vector dis(k*nq); - - // insert data - const int batch_size = 100; - for (int j = 0; j < nb / batch_size; ++j) { - stat = db_->InsertVectors(TABLE_NAME, batch_size, xb.data()+batch_size*j*TABLE_DIM, ids); - if (j == 200){ sleep(1);} - ASSERT_STATS(stat); - } - - sleep(2); // wait until build index finish - - engine::QueryResults results; - stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results); - ASSERT_STATS(stat); - - delete db_; - - auto dummyDB = engine::DBFactory::Build(options); - dummyDB->DropAll(); - delete dummyDB; - - // TODO(linxj): add groundTruth assert -}; - -TEST_F(MySQLDBTest, ARHIVE_DISK_CHECK) { - - auto options = GetOptions(); - options.meta.archive_conf = engine::ArchiveConf("delete", "disk:1"); - auto db_ = engine::DBFactory::Build(options); - - engine::meta::TableSchema table_info = BuildTableSchema(); - engine::Status stat = db_->CreateTable(table_info); - - std::vector table_schema_array; - stat = db_->AllTables(table_schema_array); - ASSERT_STATS(stat); - bool bfound = false; - for(auto& schema : table_schema_array) { - if(schema.table_id_ == TABLE_NAME) { - bfound = true; - break; - } - } - ASSERT_TRUE(bfound); - - engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = TABLE_NAME; - stat = db_->DescribeTable(table_info_get); - ASSERT_STATS(stat); - ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); - - engine::IDNumbers vector_ids; - engine::IDNumbers target_ids; - - uint64_t size; - db_->Size(size); - - int64_t nb = 10; - std::vector xb; - BuildVectors(nb, xb); - - int loop = 100000; - for (auto i=0; iInsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); - std::this_thread::sleep_for(std::chrono::microseconds(1)); - } - - std::this_thread::sleep_for(std::chrono::seconds(1)); - - db_->Size(size); - LOG(DEBUG) << "size=" << size; - ASSERT_LE(size, 1 * engine::meta::G); - - delete db_; - - auto dummyDB = engine::DBFactory::Build(options); - dummyDB->DropAll(); - delete dummyDB; -}; - -TEST_F(MySQLDBTest, DELETE_TEST) { - - auto options = GetOptions(); - options.meta.archive_conf = engine::ArchiveConf("delete", "disk:1"); - auto db_ = engine::DBFactory::Build(options); - - engine::meta::TableSchema table_info = BuildTableSchema(); - engine::Status stat = db_->CreateTable(table_info); -// std::cout << stat.ToString() << std::endl; - - engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = TABLE_NAME; - stat = db_->DescribeTable(table_info_get); - ASSERT_STATS(stat); - -// std::cout << "location: " << table_info_get.location_ << std::endl; - ASSERT_TRUE(boost::filesystem::exists(table_info_get.location_)); - - engine::IDNumbers vector_ids; - - uint64_t size; - db_->Size(size); - - int64_t nb = 100000; - std::vector xb; - BuildVectors(nb, xb); - - int loop = 20; - for (auto i=0; iInsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); - std::this_thread::sleep_for(std::chrono::microseconds(1)); - } - - std::vector dates; - stat = db_->DeleteTable(TABLE_NAME, dates); -// std::cout << "5 sec start" << std::endl; - std::this_thread::sleep_for(std::chrono::seconds(5)); -// std::cout << "5 sec finish" << std::endl; - ASSERT_TRUE(stat.ok()); -// ASSERT_FALSE(boost::filesystem::exists(table_info_get.location_)); - - delete db_; - - auto dummyDB = engine::DBFactory::Build(options); - dummyDB->DropAll(); - delete dummyDB; -}; diff --git a/cpp/unittest/db/mysql_db_test.cpp b/cpp/unittest/db/mysql_db_test.cpp new file mode 100644 index 0000000000..db3c84751e --- /dev/null +++ b/cpp/unittest/db/mysql_db_test.cpp @@ -0,0 +1,293 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include +#include +#include +#include + +#include "utils.h" +#include "db/DB.h" +#include "db/DBImpl.h" +#include "db/MetaConsts.h" +#include "db/Factories.h" + +using namespace zilliz::milvus; + +namespace { + +static const std::string TABLE_NAME = "test_group"; +static constexpr int64_t TABLE_DIM = 256; +static constexpr int64_t VECTOR_COUNT = 250000; +static constexpr int64_t INSERT_LOOP = 100000; + +engine::meta::TableSchema BuildTableSchema() { + engine::meta::TableSchema table_info; + table_info.dimension_ = TABLE_DIM; + table_info.table_id_ = TABLE_NAME; + table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP; + return table_info; +} + +void BuildVectors(int64_t n, std::vector& vectors) { + vectors.clear(); + vectors.resize(n*TABLE_DIM); + float* data = vectors.data(); + for(int i = 0; i < n; i++) { + for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); + data[TABLE_DIM * i] += i / 2000.; + } +} + +} + + +TEST_F(MySQLDBTest, DB_TEST) { + + auto options = GetOptions(); + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + engine::IDNumbers vector_ids; + engine::IDNumbers target_ids; + + int64_t nb = 50; + std::vector xb; + BuildVectors(nb, xb); + + int64_t qb = 5; + std::vector qxb; + BuildVectors(qb, qxb); + + std::thread search([&]() { + engine::QueryResults results; + int k = 10; + std::this_thread::sleep_for(std::chrono::seconds(2)); + + INIT_TIMER; + std::stringstream ss; + uint64_t count = 0; + uint64_t prev_count = 0; + + for (auto j=0; j<10; ++j) { + ss.str(""); + db_->Size(count); + prev_count = count; + + START_TIMER; + stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); + ss << "Search " << j << " With Size " << count/engine::meta::M << " M"; + STOP_TIMER(ss.str()); + + ASSERT_STATS(stat); + for (auto k=0; k= prev_count); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + }); + + int loop = INSERT_LOOP; + + for (auto i=0; iInsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); + ASSERT_EQ(target_ids.size(), qb); + } else { + db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + } + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + search.join(); + + delete db_; + + auto dummyDB = engine::DBFactory::Build(options); + dummyDB->DropAll(); + delete dummyDB; +}; + +TEST_F(MySQLDBTest, SEARCH_TEST) { + auto options = GetOptions(); + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + // prepare raw data + size_t nb = VECTOR_COUNT; + size_t nq = 10; + size_t k = 5; + std::vector xb(nb*TABLE_DIM); + std::vector xq(nq*TABLE_DIM); + std::vector ids(nb); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + for (size_t i = 0; i < nb*TABLE_DIM; i++) { + xb[i] = dis_xt(gen); + if (i < nb){ + ids[i] = i; + } + } + for (size_t i = 0; i < nq*TABLE_DIM; i++) { + xq[i] = dis_xt(gen); + } + + // result data + //std::vector nns_gt(k*nq); + std::vector nns(k*nq); // nns = nearst neg search + //std::vector dis_gt(k*nq); + std::vector dis(k*nq); + + // insert data + const int batch_size = 100; + for (int j = 0; j < nb / batch_size; ++j) { + stat = db_->InsertVectors(TABLE_NAME, batch_size, xb.data()+batch_size*j*TABLE_DIM, ids); + if (j == 200){ sleep(1);} + ASSERT_STATS(stat); + } + + sleep(2); // wait until build index finish + + engine::QueryResults results; + stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results); + ASSERT_STATS(stat); + + delete db_; + + auto dummyDB = engine::DBFactory::Build(options); + dummyDB->DropAll(); + delete dummyDB; + + // TODO(linxj): add groundTruth assert +}; + +TEST_F(MySQLDBTest, ARHIVE_DISK_CHECK) { + + auto options = GetOptions(); + options.meta.archive_conf = engine::ArchiveConf("delete", "disk:1"); + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + std::vector table_schema_array; + stat = db_->AllTables(table_schema_array); + ASSERT_STATS(stat); + bool bfound = false; + for(auto& schema : table_schema_array) { + if(schema.table_id_ == TABLE_NAME) { + bfound = true; + break; + } + } + ASSERT_TRUE(bfound); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + engine::IDNumbers vector_ids; + engine::IDNumbers target_ids; + + uint64_t size; + db_->Size(size); + + int64_t nb = 10; + std::vector xb; + BuildVectors(nb, xb); + + int loop = INSERT_LOOP; + for (auto i=0; iInsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + std::this_thread::sleep_for(std::chrono::seconds(1)); + + db_->Size(size); + LOG(DEBUG) << "size=" << size; + ASSERT_LE(size, 1 * engine::meta::G); + + delete db_; + + auto dummyDB = engine::DBFactory::Build(options); + dummyDB->DropAll(); + delete dummyDB; +}; + +TEST_F(MySQLDBTest, DELETE_TEST) { + + auto options = GetOptions(); + options.meta.archive_conf = engine::ArchiveConf("delete", "disk:1"); + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); +// std::cout << stat.ToString() << std::endl; + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + +// std::cout << "location: " << table_info_get.location_ << std::endl; + ASSERT_TRUE(boost::filesystem::exists(table_info_get.location_)); + + engine::IDNumbers vector_ids; + + uint64_t size; + db_->Size(size); + + int64_t nb = INSERT_LOOP; + std::vector xb; + BuildVectors(nb, xb); + + int loop = 20; + for (auto i=0; iInsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + std::vector dates; + stat = db_->DeleteTable(TABLE_NAME, dates); +// std::cout << "5 sec start" << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(5)); +// std::cout << "5 sec finish" << std::endl; + ASSERT_TRUE(stat.ok()); +// ASSERT_FALSE(boost::filesystem::exists(table_info_get.location_)); + + delete db_; + + auto dummyDB = engine::DBFactory::Build(options); + dummyDB->DropAll(); + delete dummyDB; +};