'update sdk'

Merge remote-tracking branch 'main/branch-0.3.0' into branch-0.3.0


Former-commit-id: 3347d7aaeab70533b4c31667d79020f55b9fa5d4
This commit is contained in:
yu yunfeng 2019-06-24 19:53:23 +08:00
commit 561229c48b
123 changed files with 5504 additions and 3630 deletions

7
cpp/.gitignore vendored
View File

@ -1,7 +1,4 @@
third_party/thrift-0.12.0/
third_party/faiss-1.5.1/
third_party/bzip2-1.0.6/
third_party/sqlite3/
megasearch/
milvus/
conf/server_config.yaml
version.h
megasearch/

View File

@ -2,14 +2,23 @@
Please mark all change in change log and use the ticket from JIRA.
# MegaSearch 0.3.0 (TBD)
# Milvus 0.3.0 (TBD)
## Bug
- MS-104 - Fix unittest lcov execution error
- MS-102 - Fix build script file condition error
- MS-80 - Fix server hang issue
- MS-89 - Fix compile failed, libgpufaiss.a link missing
- MS-90 - Fix arch match incorrect on ARM
- MS-99 - Fix compilation bug
## Improvement
- MS-82 - Update server startup welcome message
- MS-83 - Update vecwise to Milvus
- MS-77 - Performance issue of post-search action
- MS-22 - Enhancement for MemVector size control
- MS-92 - Unify behavior of debug and release build
- MS-98 - Install all unit test to installation directory
## New Feature
@ -29,11 +38,14 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-81 - fix faiss ptx issue; change cuda gencode
- MS-84 - cmake: add arrow, jemalloc and jsoncons third party; default build option OFF
- MS-85 - add NetIO metric
- MS-96 - add new query interface for specified files
- MS-97 - Add S3 SDK for MinIO Storage
## Task
- MS-74 - Change README.md in cpp
- MS-88 - Add support for arm architecture
# MegaSearch 0.2.0 (2019-05-31)
# Milvus 0.2.0 (2019-05-31)
## Bug
@ -41,6 +53,8 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-34 - Fix prometheus-cpp thirdparty
- MS-67 - Fix license check bug
- MS-76 - Fix pipeline crash bug
- MS-100 - cmake: fix AWS build issue
- MS-101 - change AWS build type to Release
## Improvement

View File

@ -24,8 +24,8 @@ if(NOT GIT_BRANCH_NAME STREQUAL "")
string(REGEX REPLACE "\n" "" GIT_BRANCH_NAME ${GIT_BRANCH_NAME})
endif()
set(MEGASEARCH_VERSION "${GIT_BRANCH_NAME}")
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]" MEGASEARCH_VERSION "${MEGASEARCH_VERSION}")
set(MILVUS_VERSION "${GIT_BRANCH_NAME}")
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]" MILVUS_VERSION "${MILVUS_VERSION}")
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(BUILD_TYPE "release")
@ -34,8 +34,8 @@ else()
endif()
message(STATUS "Build type = ${BUILD_TYPE}")
project(megasearch VERSION "${MEGASEARCH_VERSION}")
project(vecwise_engine LANGUAGES CUDA CXX)
project(milvus VERSION "${MILVUS_VERSION}")
project(milvus_engine LANGUAGES CUDA CXX)
# Ensure that a default make is set
if("${MAKE}" STREQUAL "")
@ -44,26 +44,26 @@ if("${MAKE}" STREQUAL "")
endif()
endif()
set(MEGASEARCH_VERSION_MAJOR "${megasearch_VERSION_MAJOR}")
set(MEGASEARCH_VERSION_MINOR "${megasearch_VERSION_MINOR}")
set(MEGASEARCH_VERSION_PATCH "${megasearch_VERSION_PATCH}")
set(MILVUS_VERSION_MAJOR "${milvus_VERSION_MAJOR}")
set(MILVUS_VERSION_MINOR "${milvus_VERSION_MINOR}")
set(MILVUS_VERSION_PATCH "${milvus_VERSION_PATCH}")
if(MEGASEARCH_VERSION_MAJOR STREQUAL ""
OR MEGASEARCH_VERSION_MINOR STREQUAL ""
OR MEGASEARCH_VERSION_PATCH STREQUAL "")
message(WARNING "Failed to determine MegaSearch version from '${MEGASEARCH_VERSION}'")
set(MEGASEARCH_VERSION "unknown")
if(MILVUS_VERSION_MAJOR STREQUAL ""
OR MILVUS_VERSION_MINOR STREQUAL ""
OR MILVUS_VERSION_PATCH STREQUAL "")
message(WARNING "Failed to determine Milvus version from git branch name")
set(MILVUS_VERSION "0.3.0")
endif()
message(STATUS "Build version = ${MEGASEARCH_VERSION}")
message(STATUS "Build version = ${MILVUS_VERSION}")
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/version.h.macro ${CMAKE_CURRENT_SOURCE_DIR}/version.h)
message(STATUS "MegaSearch version: "
"${MEGASEARCH_VERSION_MAJOR}.${MEGASEARCH_VERSION_MINOR}.${MEGASEARCH_VERSION_PATCH} "
"(full: '${MEGASEARCH_VERSION}')")
message(STATUS "Milvus version: "
"${MILVUS_VERSION_MAJOR}.${MILVUS_VERSION_MINOR}.${MILVUS_VERSION_PATCH} "
"(full: '${MILVUS_VERSION}')")
set(MEGASEARCH_SOURCE_DIR ${PROJECT_SOURCE_DIR})
set(MEGASEARCH_BINARY_DIR ${PROJECT_BINARY_DIR})
set(MILVUS_SOURCE_DIR ${PROJECT_SOURCE_DIR})
set(MILVUS_BINARY_DIR ${PROJECT_BINARY_DIR})
find_package(CUDA)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 -D_FORCE_INLINES -arch sm_60 --expt-extended-lambda")
@ -71,28 +71,27 @@ set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O0 -g")
message("CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}")
message("CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS}")
if (GPU_VERSION STREQUAL "ON")
add_definitions("-DGPU_VERSION")
endif ()
set(GPU_VERSION "ON")
add_definitions("-DGPU_VERSION")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED on)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
message("building vecwise_engine on x86 architecture")
set(VECWISE_BUILD_ARCH x86_64)
message("building milvus_engine on x86 architecture")
set(MILVUS_BUILD_ARCH x86_64)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc)")
message("building vecwise_engine on ppc architecture")
set(VECWISE_BUILD_ARCH ppc64le)
message("building milvus_engine on ppc architecture")
set(MILVUS_BUILD_ARCH ppc64le)
else()
message("unknown processor type")
message("CMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}")
set(VECWISE_BUILD_ARCH unknown)
set(MILVUS_BUILD_ARCH unknown)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -DELPP_THREAD_SAFE -fopenmp")
if (GPU_VERSION STREQUAL "ON")
if (CMAKE_LICENSE_CHECK STREQUAL "ON")
set(ENABLE_LICENSE "ON")
add_definitions("-DENABLE_LICENSE")
endif ()
@ -100,43 +99,42 @@ else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -fPIC -DELPP_THREAD_SAFE -fopenmp")
endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
#if (BUILD_UNIT_TEST)
# option(MEGASEARCH_BUILD_TESTS "Build the megasearch test suite" ON)
#endif(BUILD_UNIT_TEST)
include(ExternalProject)
include(DefineOptions)
include(BuildUtils)
include(ThirdPartyPackages)
include_directories(${MEGASEARCH_SOURCE_DIR})
link_directories(${MEGASEARCH_BINARY_DIR})
include_directories(${MILVUS_SOURCE_DIR})
link_directories(${MILVUS_BINARY_DIR})
## Following should be check
set(VECWISE_ENGINE_INCLUDE ${PROJECT_SOURCE_DIR}/include)
set(VECWISE_ENGINE_SRC ${PROJECT_SOURCE_DIR}/src)
#set(VECWISE_THIRD_PARTY ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
#set(VECWISE_THIRD_PARTY_BUILD ${CMAKE_CURRENT_SOURCE_DIR}/third_party/build)
set(MILVUS_ENGINE_INCLUDE ${PROJECT_SOURCE_DIR}/include)
set(MILVUS_ENGINE_SRC ${PROJECT_SOURCE_DIR}/src)
#set(MILVUS_THIRD_PARTY ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
#set(MILVUS_THIRD_PARTY_BUILD ${CMAKE_CURRENT_SOURCE_DIR}/third_party/build)
add_compile_definitions(PROFILER=${PROFILER})
include_directories(${VECWISE_ENGINE_INCLUDE})
include_directories(${VECWISE_ENGINE_SRC})
#include_directories(${VECWISE_THIRD_PARTY_BUILD}/include)
include_directories(${MILVUS_ENGINE_INCLUDE})
include_directories(${MILVUS_ENGINE_SRC})
#include_directories(${MILVUS_THIRD_PARTY_BUILD}/include)
link_directories(${CMAKE_CURRRENT_BINARY_DIR})
#link_directories(${VECWISE_THIRD_PARTY_BUILD}/lib)
#link_directories(${VECWISE_THIRD_PARTY_BUILD}/lib64)
#link_directories(${MILVUS_THIRD_PARTY_BUILD}/lib)
#link_directories(${MILVUS_THIRD_PARTY_BUILD}/lib64)
#execute_process(COMMAND bash build.sh
# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
add_subdirectory(src)
if (BUILD_COVERAGE STREQUAL "ON")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
endif()
if (BUILD_UNIT_TEST)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/unittest)
endif(BUILD_UNIT_TEST)
@ -151,7 +149,7 @@ install(FILES
scripts)
install(FILES
conf/server_config.yaml
conf/vecwise_engine_log.conf
conf/log_config.conf
DESTINATION
conf)

View File

@ -8,14 +8,14 @@
sudo apt-get install gfortran flex bison
#### Step 2: build(output to cmake_build folder)
cmake_build/src/vecwise_server is the server
cmake_build/src/milvus_server is the server
cmake_build/src/libvecwise_engine.a is the static library
cmake_build/src/libmilvus_engine.a is the static library
cd [sourcecode path]/cpp
./build.sh -t Debug
./build.sh -t Release
./build.sh -g # Build GPU version
./build.sh -l -t Release # Build license version(only available for Release)
If you encounter the following error when building:
`protocol https not supported or disabled in libcurl`

View File

@ -2,11 +2,12 @@
BUILD_TYPE="Debug"
BUILD_UNITTEST="off"
BUILD_GPU="OFF"
INSTALL_PREFIX=$(pwd)/megasearch
LICENSE_CHECK="OFF"
INSTALL_PREFIX=$(pwd)/milvus
MAKE_CLEAN="OFF"
BUILD_COVERAGE="OFF"
while getopts "p:t:uhgr" arg
while getopts "p:t:uhlrc" arg
do
case $arg in
t)
@ -19,8 +20,8 @@ do
p)
INSTALL_PREFIX=$OPTARG
;;
g)
BUILD_GPU="ON"
l)
LICENSE_CHECK="ON"
;;
r)
if [[ -d cmake_build ]]; then
@ -28,6 +29,9 @@ do
MAKE_CLEAN="ON"
fi
;;
c)
BUILD_COVERAGE="ON"
;;
h) # help
echo "
@ -35,11 +39,12 @@ parameter:
-t: build type
-u: building unit test options
-p: install prefix
-g: build GPU version
-l: build license version
-r: remove previous build directory
-c: code coverage
usage:
./build.sh -t \${BUILD_TYPE} [-u] [-h] [-g] [-r]
./build.sh -t \${BUILD_TYPE} [-u] [-h] [-g] [-r] [-c]
"
exit 0
;;
@ -59,12 +64,13 @@ cd cmake_build
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
if [[ ${MAKE_CLEAN} = "ON" ]]; then
if [[ ${MAKE_CLEAN} == "ON" ]]; then
CMAKE_CMD="cmake -DBUILD_UNIT_TEST=${BUILD_UNITTEST} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \
-DGPU_VERSION=${BUILD_GPU} \
-DCMAKE_LICENSE_CHECK=${LICENSE_CHECK} \
-DBUILD_COVERAGE=${BUILD_COVERAGE} \
$@ ../"
echo ${CMAKE_CMD}
@ -75,8 +81,13 @@ fi
make -j 4 || exit 1
if [[ ${BUILD_TYPE} != "Debug" ]]; then
strip src/vecwise_server
strip src/milvus_server
fi
make install
make install || exit 1
if [[ ${BUILD_COVERAGE} == "ON" ]]; then
cd -
bash `pwd`/coverage.sh
cd -
fi

View File

@ -1,12 +1,12 @@
macro(set_option_category name)
set(MEGASEARCH_OPTION_CATEGORY ${name})
list(APPEND "MEGASEARCH_OPTION_CATEGORIES" ${name})
set(MILVUS_OPTION_CATEGORY ${name})
list(APPEND "MILVUS_OPTION_CATEGORIES" ${name})
endmacro()
macro(define_option name description default)
option(${name} ${description} ${default})
list(APPEND "MEGASEARCH_${MEGASEARCH_OPTION_CATEGORY}_OPTION_NAMES" ${name})
list(APPEND "MILVUS_${MILVUS_OPTION_CATEGORY}_OPTION_NAMES" ${name})
set("${name}_OPTION_DESCRIPTION" ${description})
set("${name}_OPTION_DEFAULT" ${default})
set("${name}_OPTION_TYPE" "bool")
@ -28,7 +28,7 @@ endfunction()
macro(define_option_string name description default)
set(${name} ${default} CACHE STRING ${description})
list(APPEND "MEGASEARCH_${MEGASEARCH_OPTION_CATEGORY}_OPTION_NAMES" ${name})
list(APPEND "MILVUS_${MILVUS_OPTION_CATEGORY}_OPTION_NAMES" ${name})
set("${name}_OPTION_DESCRIPTION" ${description})
set("${name}_OPTION_DEFAULT" "\"${default}\"")
set("${name}_OPTION_TYPE" "string")
@ -43,69 +43,71 @@ endmacro()
#----------------------------------------------------------------------
set_option_category("Thirdparty")
set(MEGASEARCH_DEPENDENCY_SOURCE_DEFAULT "AUTO")
set(MILVUS_DEPENDENCY_SOURCE_DEFAULT "AUTO")
define_option_string(MEGASEARCH_DEPENDENCY_SOURCE
"Method to use for acquiring MEGASEARCH's build dependencies"
"${MEGASEARCH_DEPENDENCY_SOURCE_DEFAULT}"
define_option_string(MILVUS_DEPENDENCY_SOURCE
"Method to use for acquiring MILVUS's build dependencies"
"${MILVUS_DEPENDENCY_SOURCE_DEFAULT}"
"AUTO"
"BUNDLED"
"SYSTEM")
define_option(MEGASEARCH_VERBOSE_THIRDPARTY_BUILD
define_option(MILVUS_VERBOSE_THIRDPARTY_BUILD
"Show output from ExternalProjects rather than just logging to files" ON)
define_option(MEGASEARCH_WITH_ARROW "Build with ARROW" OFF)
define_option(MILVUS_WITH_ARROW "Build with ARROW" OFF)
define_option(MEGASEARCH_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" OFF)
define_option(MILVUS_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" OFF)
define_option(MEGASEARCH_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \
define_option(MILVUS_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \
Note that this requires linking Boost statically" ON)
define_option(MEGASEARCH_BOOST_HEADER_ONLY "Use only BOOST headers" OFF)
define_option(MILVUS_BOOST_HEADER_ONLY "Use only BOOST headers" OFF)
define_option(MEGASEARCH_WITH_BZ2 "Build with BZ2 compression" ON)
define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON)
define_option(MEGASEARCH_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON)
define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON)
define_option(MEGASEARCH_WITH_FAISS "Build with FAISS library" ON)
define_option(MILVUS_WITH_FAISS "Build with FAISS library" ON)
define_option(MEGASEARCH_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON)
define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON)
#define_option_string(MEGASEARCH_FAISS_GPU_ARCH "Specifying which GPU architectures to build against"
#define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against"
# "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61")
define_option(MEGASEARCH_WITH_LAPACK "Build with LAPACK library" ON)
define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" ON)
define_option(MEGASEARCH_WITH_LZ4 "Build with lz4 compression" ON)
define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON)
define_option(MEGASEARCH_WITH_JSONCONS "Build with JSONCONS" OFF)
define_option(MILVUS_WITH_JSONCONS "Build with JSONCONS" OFF)
define_option(MEGASEARCH_WITH_OPENBLAS "Build with OpenBLAS library" ON)
define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" ON)
define_option(MEGASEARCH_WITH_PROMETHEUS "Build with PROMETHEUS library" ON)
define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON)
define_option(MEGASEARCH_WITH_ROCKSDB "Build with RocksDB library" OFF)
define_option(MILVUS_WITH_ROCKSDB "Build with RocksDB library" OFF)
define_option(MEGASEARCH_WITH_SNAPPY "Build with Snappy compression" ON)
define_option(MILVUS_WITH_SNAPPY "Build with Snappy compression" ON)
define_option(MEGASEARCH_WITH_SQLITE "Build with SQLite library" ON)
define_option(MILVUS_WITH_SQLITE "Build with SQLite library" ON)
define_option(MEGASEARCH_WITH_SQLITE_ORM "Build with SQLite ORM library" ON)
define_option(MILVUS_WITH_SQLITE_ORM "Build with SQLite ORM library" ON)
define_option(MEGASEARCH_WITH_THRIFT "Build with Apache Thrift library" ON)
define_option(MILVUS_WITH_THRIFT "Build with Apache Thrift library" ON)
define_option(MEGASEARCH_WITH_YAMLCPP "Build with yaml-cpp library" ON)
define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON)
define_option(MEGASEARCH_WITH_ZLIB "Build with zlib compression" ON)
define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON)
if(CMAKE_VERSION VERSION_LESS 3.7)
set(MEGASEARCH_WITH_ZSTD_DEFAULT OFF)
set(MILVUS_WITH_ZSTD_DEFAULT OFF)
else()
# ExternalProject_Add(SOURCE_SUBDIR) is available since CMake 3.7.
set(MEGASEARCH_WITH_ZSTD_DEFAULT ON)
set(MILVUS_WITH_ZSTD_DEFAULT ON)
endif()
define_option(MEGASEARCH_WITH_ZSTD "Build with zstd compression" ${MEGASEARCH_WITH_ZSTD_DEFAULT})
define_option(MILVUS_WITH_ZSTD "Build with zstd compression" ${MILVUS_WITH_ZSTD_DEFAULT})
define_option(MILVUS_WITH_AWS "Build with AWS SDK" ON)
#----------------------------------------------------------------------
if(MSVC)
@ -115,7 +117,7 @@ if(MSVC)
"Pass verbose linking options when linking libraries and executables"
OFF)
define_option(MEGASEARCH_USE_STATIC_CRT "Build MEGASEARCH with statically linked CRT" OFF)
define_option(MILVUS_USE_STATIC_CRT "Build MILVUS with statically linked CRT" OFF)
endif()
@ -123,15 +125,15 @@ endif()
set_option_category("Test and benchmark")
if (BUILD_UNIT_TEST)
define_option(MEGASEARCH_BUILD_TESTS "Build the MEGASEARCH googletest unit tests" ON)
define_option(MILVUS_BUILD_TESTS "Build the MILVUS googletest unit tests" ON)
else()
define_option(MEGASEARCH_BUILD_TESTS "Build the MEGASEARCH googletest unit tests" OFF)
define_option(MILVUS_BUILD_TESTS "Build the MILVUS googletest unit tests" OFF)
endif(BUILD_UNIT_TEST)
#----------------------------------------------------------------------
macro(config_summary)
message(STATUS "---------------------------------------------------------------------")
message(STATUS "MEGASEARCH version: ${MEGASEARCH_VERSION}")
message(STATUS "MILVUS version: ${MILVUS_VERSION}")
message(STATUS)
message(STATUS "Build configuration summary:")
@ -143,12 +145,12 @@ macro(config_summary)
STATUS " Compile commands: ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json")
endif()
foreach(category ${MEGASEARCH_OPTION_CATEGORIES})
foreach(category ${MILVUS_OPTION_CATEGORIES})
message(STATUS)
message(STATUS "${category} options:")
set(option_names ${MEGASEARCH_${category}_OPTION_NAMES})
set(option_names ${MILVUS_${category}_OPTION_NAMES})
set(max_value_length 0)
foreach(name ${option_names})

View File

@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
set(MEGASEARCH_THIRDPARTY_DEPENDENCIES
set(MILVUS_THIRDPARTY_DEPENDENCIES
ARROW
BOOST
@ -35,14 +35,15 @@ set(MEGASEARCH_THIRDPARTY_DEPENDENCIES
Thrift
yaml-cpp
ZLIB
ZSTD)
ZSTD
AWS)
message(STATUS "Using ${MEGASEARCH_DEPENDENCY_SOURCE} approach to find dependencies")
message(STATUS "Using ${MILVUS_DEPENDENCY_SOURCE} approach to find dependencies")
# For each dependency, set dependency source to global default, if unset
foreach(DEPENDENCY ${MEGASEARCH_THIRDPARTY_DEPENDENCIES})
foreach(DEPENDENCY ${MILVUS_THIRDPARTY_DEPENDENCIES})
if("${${DEPENDENCY}_SOURCE}" STREQUAL "")
set(${DEPENDENCY}_SOURCE ${MEGASEARCH_DEPENDENCY_SOURCE})
set(${DEPENDENCY}_SOURCE ${MILVUS_DEPENDENCY_SOURCE})
endif()
endforeach()
@ -83,6 +84,8 @@ macro(build_dependency DEPENDENCY_NAME)
build_zlib()
elseif("${DEPENDENCY_NAME}" STREQUAL "ZSTD")
build_zstd()
elseif("${DEPENDENCY_NAME}" STREQUAL "AWS")
build_aws()
else()
message(FATAL_ERROR "Unknown thirdparty dependency to build: ${DEPENDENCY_NAME}")
endif ()
@ -145,7 +148,7 @@ set(EP_COMMON_CMAKE_ARGS
-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS})
if(NOT MEGASEARCH_VERBOSE_THIRDPARTY_BUILD)
if(NOT MILVUS_VERBOSE_THIRDPARTY_BUILD)
set(EP_LOG_OPTIONS LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 LOG_DOWNLOAD 1)
else()
set(EP_LOG_OPTIONS)
@ -158,10 +161,9 @@ if("${MAKE}" STREQUAL "")
endif()
endif()
set(MAKE_BUILD_ARGS "-j4")
set(MAKE_BUILD_ARGS "-j2")
## Using make -j in sub-make is fragile
## see discussion https://github.com/apache/MEGASEARCH/pull/2779
#if(${CMAKE_GENERATOR} MATCHES "Makefiles")
# set(MAKE_BUILD_ARGS "")
#else()
@ -180,7 +182,7 @@ find_package(Threads REQUIRED)
# offline builds
# Read toolchain versions from cpp/thirdparty/versions.txt
set(THIRDPARTY_DIR "${MEGASEARCH_SOURCE_DIR}/thirdparty")
set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty")
file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT)
foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
# Exclude comments
@ -202,16 +204,16 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
set(${_LIB_NAME} "${_LIB_VERSION}")
endforeach()
if(DEFINED ENV{MEGASEARCH_ARROW_URL})
set(ARROW_SOURCE_URL "$ENV{MEGASEARCH_ARROW_URL}")
if(DEFINED ENV{MILVUS_ARROW_URL})
set(ARROW_SOURCE_URL "$ENV{MILVUS_ARROW_URL}")
else()
set(ARROW_SOURCE_URL
"https://github.com/youny626/arrow.git"
)
endif()
if(DEFINED ENV{MEGASEARCH_BOOST_URL})
set(BOOST_SOURCE_URL "$ENV{MEGASEARCH_BOOST_URL}")
if(DEFINED ENV{MILVUS_BOOST_URL})
set(BOOST_SOURCE_URL "$ENV{MILVUS_BOOST_URL}")
else()
string(REPLACE "." "_" BOOST_VERSION_UNDERSCORES ${BOOST_VERSION})
set(BOOST_SOURCE_URL
@ -219,58 +221,58 @@ else()
)
endif()
if(DEFINED ENV{MEGASEARCH_BZIP2_URL})
set(BZIP2_SOURCE_URL "$ENV{MEGASEARCH_BZIP2_URL}")
if(DEFINED ENV{MILVUS_BZIP2_URL})
set(BZIP2_SOURCE_URL "$ENV{MILVUS_BZIP2_URL}")
else()
set(BZIP2_SOURCE_URL "https://fossies.org/linux/misc/bzip2-${BZIP2_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_EASYLOGGINGPP_URL})
set(EASYLOGGINGPP_SOURCE_URL "$ENV{MEGASEARCH_EASYLOGGINGPP_URL}")
if(DEFINED ENV{MILVUS_EASYLOGGINGPP_URL})
set(EASYLOGGINGPP_SOURCE_URL "$ENV{MILVUS_EASYLOGGINGPP_URL}")
else()
set(EASYLOGGINGPP_SOURCE_URL "https://github.com/zuhd-org/easyloggingpp/archive/${EASYLOGGINGPP_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_FAISS_URL})
set(FAISS_SOURCE_URL "$ENV{MEGASEARCH_FAISS_URL}")
if(DEFINED ENV{MILVUS_FAISS_URL})
set(FAISS_SOURCE_URL "$ENV{MILVUS_FAISS_URL}")
else()
set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz")
endif()
if (DEFINED ENV{MEGASEARCH_GTEST_URL})
set(GTEST_SOURCE_URL "$ENV{MEGASEARCH_GTEST_URL}")
if (DEFINED ENV{MILVUS_GTEST_URL})
set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}")
else ()
set(GTEST_SOURCE_URL
"https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz")
endif()
if (DEFINED ENV{MEGASEARCH_JSONCONS_URL})
set(JSONCONS_SOURCE_URL "$ENV{MEGASEARCH_JSONCONS_URL}")
if (DEFINED ENV{MILVUS_JSONCONS_URL})
set(JSONCONS_SOURCE_URL "$ENV{MILVUS_JSONCONS_URL}")
else ()
set(JSONCONS_SOURCE_URL
"https://github.com/danielaparker/jsoncons/archive/v${JSONCONS_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_LAPACK_URL})
set(LAPACK_SOURCE_URL "$ENV{MEGASEARCH_LAPACK_URL}")
if(DEFINED ENV{MILVUS_LAPACK_URL})
set(LAPACK_SOURCE_URL "$ENV{MILVUS_LAPACK_URL}")
else()
set(LAPACK_SOURCE_URL "https://github.com/Reference-LAPACK/lapack/archive/${LAPACK_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_LZ4_URL})
set(LZ4_SOURCE_URL "$ENV{MEGASEARCH_LZ4_URL}")
if(DEFINED ENV{MILVUS_LZ4_URL})
set(LZ4_SOURCE_URL "$ENV{MILVUS_LZ4_URL}")
else()
set(LZ4_SOURCE_URL "https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz")
endif()
if (DEFINED ENV{MEGASEARCH_OPENBLAS_URL})
set(OPENBLAS_SOURCE_URL "$ENV{MEGASEARCH_OPENBLAS_URL}")
if (DEFINED ENV{MILVUS_OPENBLAS_URL})
set(OPENBLAS_SOURCE_URL "$ENV{MILVUS_OPENBLAS_URL}")
else ()
set(OPENBLAS_SOURCE_URL
"https://github.com/xianyi/OpenBLAS/archive/${OPENBLAS_VERSION}.tar.gz")
endif()
if (DEFINED ENV{MEGASEARCH_PROMETHEUS_URL})
if (DEFINED ENV{MILVUS_PROMETHEUS_URL})
set(PROMETHEUS_SOURCE_URL "$ENV{PROMETHEUS_OPENBLAS_URL}")
else ()
set(PROMETHEUS_SOURCE_URL
@ -278,59 +280,64 @@ else ()
https://github.com/jupp0r/prometheus-cpp.git)
endif()
if (DEFINED ENV{MEGASEARCH_ROCKSDB_URL})
set(ROCKSDB_SOURCE_URL "$ENV{MEGASEARCH_ROCKSDB_URL}")
if (DEFINED ENV{MILVUS_ROCKSDB_URL})
set(ROCKSDB_SOURCE_URL "$ENV{MILVUS_ROCKSDB_URL}")
else ()
set(ROCKSDB_SOURCE_URL
"https://github.com/facebook/rocksdb/archive/${ROCKSDB_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_SNAPPY_URL})
set(SNAPPY_SOURCE_URL "$ENV{MEGASEARCH_SNAPPY_URL}")
if(DEFINED ENV{MILVUS_SNAPPY_URL})
set(SNAPPY_SOURCE_URL "$ENV{MILVUS_SNAPPY_URL}")
else()
set(SNAPPY_SOURCE_URL
"https://github.com/google/snappy/archive/${SNAPPY_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_SQLITE_URL})
set(SQLITE_SOURCE_URL "$ENV{MEGASEARCH_SQLITE_URL}")
if(DEFINED ENV{MILVUS_SQLITE_URL})
set(SQLITE_SOURCE_URL "$ENV{MILVUS_SQLITE_URL}")
else()
set(SQLITE_SOURCE_URL
"https://www.sqlite.org/2019/sqlite-autoconf-${SQLITE_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_SQLITE_ORM_URL})
set(SQLITE_ORM_SOURCE_URL "$ENV{MEGASEARCH_SQLITE_ORM_URL}")
if(DEFINED ENV{MILVUS_SQLITE_ORM_URL})
set(SQLITE_ORM_SOURCE_URL "$ENV{MILVUS_SQLITE_ORM_URL}")
else()
set(SQLITE_ORM_SOURCE_URL
"https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.zip")
endif()
if(DEFINED ENV{MEGASEARCH_THRIFT_URL})
set(THRIFT_SOURCE_URL "$ENV{MEGASEARCH_THRIFT_URL}")
if(DEFINED ENV{MILVUS_THRIFT_URL})
set(THRIFT_SOURCE_URL "$ENV{MILVUS_THRIFT_URL}")
else()
set(THRIFT_SOURCE_URL
"https://github.com/apache/thrift/archive/${THRIFT_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_YAMLCPP_URL})
set(YAMLCPP_SOURCE_URL "$ENV{MEGASEARCH_YAMLCPP_URL}")
if(DEFINED ENV{MILVUS_YAMLCPP_URL})
set(YAMLCPP_SOURCE_URL "$ENV{MILVUS_YAMLCPP_URL}")
else()
set(YAMLCPP_SOURCE_URL "https://github.com/jbeder/yaml-cpp/archive/yaml-cpp-${YAMLCPP_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_ZLIB_URL})
set(ZLIB_SOURCE_URL "$ENV{MEGASEARCH_ZLIB_URL}")
if(DEFINED ENV{MILVUS_ZLIB_URL})
set(ZLIB_SOURCE_URL "$ENV{MILVUS_ZLIB_URL}")
else()
set(ZLIB_SOURCE_URL "https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_ZSTD_URL})
set(ZSTD_SOURCE_URL "$ENV{MEGASEARCH_ZSTD_URL}")
if(DEFINED ENV{MILVUS_ZSTD_URL})
set(ZSTD_SOURCE_URL "$ENV{MILVUS_ZSTD_URL}")
else()
set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MILVUS_AWS_URL})
set(AWS_SOURCE_URL "$ENV{MILVUS_AWS_URL}")
else()
set(AWS_SOURCE_URL "https://github.com/aws/aws-sdk-cpp/archive/${AWS_VERSION}.tar.gz")
endif()
# ----------------------------------------------------------------------
# ARROW
@ -410,7 +417,7 @@ macro(build_arrow)
endmacro()
if(MEGASEARCH_WITH_ARROW)
if(MILVUS_WITH_ARROW)
resolve_dependency(ARROW)
@ -422,7 +429,7 @@ endif()
# Add Boost dependencies (code adapted from Apache Kudu (incubating))
set(Boost_USE_MULTITHREADED ON)
if(MSVC AND MEGASEARCH_USE_STATIC_CRT)
if(MSVC AND MILVUS_USE_STATIC_CRT)
set(Boost_USE_STATIC_RUNTIME ON)
endif()
set(Boost_ADDITIONAL_VERSIONS
@ -449,7 +456,7 @@ set(Boost_ADDITIONAL_VERSIONS
"1.60.0"
"1.60")
if(MEGASEARCH_BOOST_VENDORED)
if(MILVUS_BOOST_VENDORED)
set(BOOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-prefix/src/boost_ep")
set(BOOST_LIB_DIR "${BOOST_PREFIX}/stage/lib")
set(BOOST_BUILD_LINK "static")
@ -466,7 +473,7 @@ if(MEGASEARCH_BOOST_VENDORED)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
set(BOOST_SERIALIZATION_LIBRARY boost_serialization_static)
if(MEGASEARCH_BOOST_HEADER_ONLY)
if(MILVUS_BOOST_HEADER_ONLY)
set(BOOST_BUILD_PRODUCTS)
set(BOOST_CONFIGURE_COMMAND "")
set(BOOST_BUILD_COMMAND "")
@ -492,7 +499,7 @@ if(MEGASEARCH_BOOST_VENDORED)
add_thirdparty_lib(boost_serialization STATIC_LIB "${BOOST_STATIC_SERIALIZATION_LIBRARY}")
set(MEGASEARCH_BOOST_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY} ${BOOST_STATIC_SERIALIZATION_LIBRARY})
set(MILVUS_BOOST_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY} ${BOOST_STATIC_SERIALIZATION_LIBRARY})
endif()
externalproject_add(boost_ep
URL
@ -526,7 +533,7 @@ else()
# set(Boost_NO_SYSTEM_PATHS ON)
# endif()
if(MEGASEARCH_BOOST_USE_SHARED)
if(MILVUS_BOOST_USE_SHARED)
# Find shared Boost libraries.
set(Boost_USE_STATIC_LIBS OFF)
set(BUILD_SHARED_LIBS_KEEP ${BUILD_SHARED_LIBS})
@ -537,14 +544,14 @@ else()
add_definitions(-DBOOST_ALL_DYN_LINK)
endif()
if(MEGASEARCH_BOOST_HEADER_ONLY)
if(MILVUS_BOOST_HEADER_ONLY)
find_package(Boost REQUIRED)
else()
find_package(Boost COMPONENTS serialization system filesystem REQUIRED)
set(BOOST_SYSTEM_LIBRARY Boost::system)
set(BOOST_FILESYSTEM_LIBRARY Boost::filesystem)
set(BOOST_SERIALIZATION_LIBRARY Boost::serialization)
set(MEGASEARCH_BOOST_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY})
set(MILVUS_BOOST_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY})
endif()
set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_KEEP})
unset(BUILD_SHARED_LIBS_KEEP)
@ -552,14 +559,14 @@ else()
# Find static boost headers and libs
# TODO Differentiate here between release and debug builds
set(Boost_USE_STATIC_LIBS ON)
if(MEGASEARCH_BOOST_HEADER_ONLY)
if(MILVUS_BOOST_HEADER_ONLY)
find_package(Boost REQUIRED)
else()
find_package(Boost COMPONENTS serialization system filesystem REQUIRED)
set(BOOST_SYSTEM_LIBRARY Boost::system)
set(BOOST_FILESYSTEM_LIBRARY Boost::filesystem)
set(BOOST_SERIALIZATION_LIBRARY Boost::serialization)
set(MEGASEARCH_BOOST_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY})
set(MILVUS_BOOST_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY})
endif()
endif()
endif()
@ -612,7 +619,7 @@ macro(build_bzip2)
add_dependencies(bzip2 bzip2_ep)
endmacro()
if(MEGASEARCH_WITH_BZ2)
if(MILVUS_WITH_BZ2)
resolve_dependency(BZip2)
if(NOT TARGET bzip2)
@ -664,7 +671,7 @@ macro(build_easyloggingpp)
add_dependencies(easyloggingpp easyloggingpp_ep)
endmacro()
if(MEGASEARCH_WITH_EASYLOGGINGPP)
if(MILVUS_WITH_EASYLOGGINGPP)
resolve_dependency(Easylogging++)
get_target_property(EASYLOGGINGPP_INCLUDE_DIR easyloggingpp INTERFACE_INCLUDE_DIRECTORIES)
@ -710,7 +717,7 @@ macro(build_openblas)
add_dependencies(openblas openblas_ep)
endmacro()
#if(MEGASEARCH_WITH_OPENBLAS)
#if(MILVUS_WITH_OPENBLAS)
# resolve_dependency(OpenBLAS)
#
# get_target_property(OPENBLAS_INCLUDE_DIR openblas INTERFACE_INCLUDE_DIRECTORIES)
@ -754,7 +761,7 @@ macro(build_lapack)
add_dependencies(lapack lapack_ep)
endmacro()
#if(MEGASEARCH_WITH_LAPACK)
#if(MILVUS_WITH_LAPACK)
# resolve_dependency(LAPACK)
#
# get_target_property(LAPACK_INCLUDE_DIR lapack INTERFACE_INCLUDE_DIRECTORIES)
@ -791,7 +798,7 @@ macro(build_faiss)
# endif()
# set(FAISS_DEPENDENCIES ${FAISS_DEPENDENCIES} ${OPENBLAS_LIBRARY})
if(${MEGASEARCH_WITH_FAISS_GPU_VERSION} STREQUAL "ON")
if(${MILVUS_WITH_FAISS_GPU_VERSION} STREQUAL "ON")
set(FAISS_CONFIGURE_ARGS ${FAISS_CONFIGURE_ARGS}
"--with-cuda=${CUDA_TOOLKIT_ROOT_DIR}"
# "with_cuda_arch=\"-gencode=arch=compute_35,code=compute_35 \\
@ -821,14 +828,17 @@ macro(build_faiss)
# BUILD_COMMAND
# ${MAKE} ${MAKE_BUILD_ARGS}
BUILD_COMMAND
${MAKE}
${MAKE_BUILD_ARGS} all
${MAKE} ${MAKE_BUILD_ARGS} all
COMMAND
cd gpu && make ${MAKE_BUILD_ARGS}
BUILD_IN_SOURCE
1
# INSTALL_DIR
# ${FAISS_PREFIX}
# INSTALL_COMMAND
# ""
INSTALL_COMMAND
${MAKE} install
COMMAND
ln -s faiss_ep ../faiss
BUILD_BYPRODUCTS
${FAISS_STATIC_LIB})
# DEPENDS
@ -852,7 +862,7 @@ macro(build_faiss)
endmacro()
if(MEGASEARCH_WITH_FAISS)
if(MILVUS_WITH_FAISS)
resolve_dependency(OpenBLAS)
get_target_property(OPENBLAS_INCLUDE_DIR openblas INTERFACE_INCLUDE_DIRECTORIES)
@ -867,7 +877,10 @@ if(MEGASEARCH_WITH_FAISS)
resolve_dependency(FAISS)
get_target_property(FAISS_INCLUDE_DIR faiss INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM "${FAISS_INCLUDE_DIR}")
include_directories(SYSTEM "${CMAKE_CURRENT_BINARY_DIR}/faiss_ep-prefix/src/")
link_directories(SYSTEM ${FAISS_PREFIX}/)
link_directories(SYSTEM ${FAISS_PREFIX}/lib/)
link_directories(SYSTEM ${FAISS_PREFIX}/gpu/)
endif()
# ----------------------------------------------------------------------
@ -944,7 +957,7 @@ macro(build_gtest)
endmacro()
if (MEGASEARCH_BUILD_TESTS)
if (MILVUS_BUILD_TESTS)
#message(STATUS "Resolving gtest dependency")
resolve_dependency(GTest)
@ -976,7 +989,7 @@ macro(build_jsoncons)
endif ()
endmacro()
if(MEGASEARCH_WITH_JSONCONS)
if(MILVUS_WITH_JSONCONS)
resolve_dependency(JSONCONS)
include_directories(SYSTEM "${JSONCONS_INCLUDE_DIR}")
endif()
@ -990,7 +1003,7 @@ macro(build_lz4)
set(LZ4_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix/")
if(MSVC)
if(MEGASEARCH_USE_STATIC_CRT)
if(MILVUS_USE_STATIC_CRT)
if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
set(LZ4_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreadedDebug")
else()
@ -1046,7 +1059,7 @@ macro(build_lz4)
add_dependencies(lz4 lz4_ep)
endmacro()
if(MEGASEARCH_WITH_LZ4)
if(MILVUS_WITH_LZ4)
resolve_dependency(Lz4)
# TODO: Don't use global includes but rather target_include_directories
@ -1129,7 +1142,7 @@ macro(build_prometheus)
add_dependencies(prometheus-cpp-core prometheus_ep)
endmacro()
if(MEGASEARCH_WITH_PROMETHEUS)
if(MILVUS_WITH_PROMETHEUS)
resolve_dependency(Prometheus)
@ -1191,7 +1204,7 @@ macro(build_rocksdb)
add_dependencies(rocksdb rocksdb_ep)
endmacro()
if(MEGASEARCH_WITH_ROCKSDB)
if(MILVUS_WITH_ROCKSDB)
resolve_dependency(RocksDB)
@ -1244,7 +1257,7 @@ macro(build_snappy)
add_dependencies(snappy snappy_ep)
endmacro()
if(MEGASEARCH_WITH_SNAPPY)
if(MILVUS_WITH_SNAPPY)
# if(Snappy_SOURCE STREQUAL "AUTO")
# # Normally *Config.cmake files reside in /usr/lib/cmake but Snappy
# # errornously places them in ${CMAKE_ROOT}/Modules/
@ -1320,7 +1333,7 @@ macro(build_sqlite)
add_dependencies(sqlite sqlite_ep)
endmacro()
if(MEGASEARCH_WITH_SQLITE)
if(MILVUS_WITH_SQLITE)
resolve_dependency(SQLite)
include_directories(SYSTEM "${SQLITE_INCLUDE_DIR}")
link_directories(SYSTEM ${SQLITE_PREFIX}/lib/)
@ -1407,7 +1420,7 @@ macro(build_sqlite_orm)
# add_dependencies(sqlite_orm sqlite_orm_ep)
endmacro()
if(MEGASEARCH_WITH_SQLITE_ORM)
if(MILVUS_WITH_SQLITE_ORM)
resolve_dependency(SQLite_ORM)
# ExternalProject_Get_Property(sqlite_orm_ep source_dir)
# set(SQLITE_ORM_INCLUDE_DIR ${source_dir}/sqlite_orm_ep)
@ -1427,17 +1440,19 @@ macro(build_thrift)
${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${THRIFT_PREFIX}"
"-DCMAKE_INSTALL_RPATH=${THRIFT_PREFIX}/lib"
-DBUILD_SHARED_LIBS=OFF
-DBUILD_TESTING=OFF
-DBUILD_EXAMPLES=OFF
-DBOOST_ROOT=${BOOST_PREFIX}
-DWITH_CPP=ON
-DWITH_STATIC_LIB=ON
-DBUILD_SHARED_LIBS=OFF
-DBUILD_TESTING=OFF
-DBUILD_EXAMPLES=OFF
-DBUILD_TUTORIALS=OFF
-DWITH_QT4=OFF
-DWITH_QT5=OFF
-DWITH_C_GLIB=OFF
-DWITH_JAVA=OFF
-DWITH_PYTHON=OFF
-DWITH_HASKELL=OFF
-DWITH_CPP=ON
-DWITH_STATIC_LIB=ON
-DWITH_LIBEVENT=OFF
-DCMAKE_BUILD_TYPE=Release)
@ -1451,7 +1466,7 @@ macro(build_thrift)
set(THRIFT_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}thrift")
if(MSVC)
if(MEGASEARCH_USE_STATIC_CRT)
if(MILVUS_USE_STATIC_CRT)
set(THRIFT_STATIC_LIB_NAME "${THRIFT_STATIC_LIB_NAME}")
set(THRIFT_CMAKE_ARGS ${THRIFT_CMAKE_ARGS} "-DWITH_MT=ON")
else()
@ -1537,6 +1552,8 @@ macro(build_thrift)
${MAKE_BUILD_ARGS}
CMAKE_ARGS
${THRIFT_CMAKE_ARGS}
INSTALL_COMMAND
${MAKE} install
DEPENDS
${THRIFT_DEPENDENCIES}
${EP_LOG_OPTIONS})
@ -1550,12 +1567,15 @@ macro(build_thrift)
add_dependencies(thrift thrift_ep)
endmacro()
if(MEGASEARCH_WITH_THRIFT)
if(MILVUS_WITH_THRIFT)
resolve_dependency(Thrift)
# TODO: Don't use global includes but rather target_include_directories
# MESSAGE(STATUS ${THRIFT_PREFIX}/lib/)
link_directories(SYSTEM ${THRIFT_PREFIX}/lib/)
link_directories(SYSTEM ${CMAKE_CURRENT_BINARY_DIR}/thrift_ep-prefix/src/thrift_ep-build/lib)
include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})
include_directories(SYSTEM ${THRIFT_PREFIX}/lib/cpp/src)
include_directories(SYSTEM ${CMAKE_CURRENT_BINARY_DIR}/thrift_ep-prefix/src/thrift_ep-build)
endif()
# ----------------------------------------------------------------------
@ -1594,7 +1614,7 @@ macro(build_yamlcpp)
add_dependencies(yaml-cpp yaml-cpp_ep)
endmacro()
if(MEGASEARCH_WITH_YAMLCPP)
if(MILVUS_WITH_YAMLCPP)
resolve_dependency(yaml-cpp)
# TODO: Don't use global includes but rather target_include_directories
@ -1644,7 +1664,7 @@ macro(build_zlib)
add_dependencies(zlib zlib_ep)
endmacro()
if(MEGASEARCH_WITH_ZLIB)
if(MILVUS_WITH_ZLIB)
resolve_dependency(ZLIB)
# TODO: Don't use global includes but rather target_include_directories
@ -1671,7 +1691,7 @@ macro(build_zstd)
if(MSVC)
set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/zstd_static.lib")
if(MEGASEARCH_USE_STATIC_CRT)
if(MILVUS_USE_STATIC_CRT)
set(ZSTD_CMAKE_ARGS ${ZSTD_CMAKE_ARGS} "-DZSTD_USE_STATIC_RUNTIME=on")
endif()
else()
@ -1716,7 +1736,7 @@ macro(build_zstd)
add_dependencies(zstd zstd_ep)
endmacro()
if(MEGASEARCH_WITH_ZSTD)
if(MILVUS_WITH_ZSTD)
resolve_dependency(ZSTD)
# TODO: Don't use global includes but rather target_include_directories
@ -1724,3 +1744,87 @@ if(MEGASEARCH_WITH_ZSTD)
link_directories(SYSTEM ${ZSTD_PREFIX}/lib)
include_directories(SYSTEM ${ZSTD_INCLUDE_DIR})
endif()
# ----------------------------------------------------------------------
# aws
macro(build_aws)
message(STATUS "Building aws-${AWS_VERSION} from source")
set(AWS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/aws_ep-prefix/src/aws_ep")
set(AWS_CMAKE_ARGS
${EP_COMMON_TOOLCHAIN}
"-DCMAKE_INSTALL_PREFIX=${AWS_PREFIX}"
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_LIBDIR=lib #${CMAKE_INSTALL_LIBDIR}
-DBUILD_ONLY=s3
-DBUILD_SHARED_LIBS=off
-DENABLE_TESTING=off
-DENABLE_UNITY_BUILD=on
-DNO_ENCRYPTION=off)
set(AWS_CPP_SDK_CORE_STATIC_LIB
"${AWS_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}aws-cpp-sdk-core${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(AWS_CPP_SDK_S3_STATIC_LIB
"${AWS_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}aws-cpp-sdk-s3${CMAKE_STATIC_LIBRARY_SUFFIX}")
# Only pass our C flags on Unix as on MSVC it leads to a
# "incompatible command-line options" error
set(AWS_CMAKE_ARGS
${AWS_CMAKE_ARGS}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_FLAGS=${EP_C_FLAGS}
-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS})
if(CMAKE_VERSION VERSION_LESS 3.7)
message(FATAL_ERROR "Building AWS using ExternalProject requires at least CMake 3.7")
endif()
externalproject_add(aws_ep
${EP_LOG_OPTIONS}
CMAKE_ARGS
${AWS_CMAKE_ARGS}
BUILD_COMMAND
${MAKE}
${MAKE_BUILD_ARGS}
INSTALL_DIR
${AWS_PREFIX}
URL
${AWS_SOURCE_URL}
BUILD_BYPRODUCTS
"${AWS_CPP_SDK_S3_STATIC_LIB}"
"${AWS_CPP_SDK_CORE_STATIC_LIB}")
file(MAKE_DIRECTORY "${AWS_PREFIX}/include")
add_library(aws-cpp-sdk-s3 STATIC IMPORTED)
set_target_properties(aws-cpp-sdk-s3
PROPERTIES
IMPORTED_LOCATION "${AWS_CPP_SDK_S3_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${AWS_PREFIX}/include"
INTERFACE_LINK_LIBRARIES "${AWS_PREFIX}/lib/libaws-c-event-stream.a;${AWS_PREFIX}/lib/libaws-checksums.a;${AWS_PREFIX}/lib/libaws-c-common.a")
add_library(aws-cpp-sdk-core STATIC IMPORTED)
set_target_properties(aws-cpp-sdk-core
PROPERTIES IMPORTED_LOCATION "${AWS_CPP_SDK_CORE_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${AWS_PREFIX}/include"
INTERFACE_LINK_LIBRARIES "${AWS_PREFIX}/lib/libaws-c-event-stream.a;${AWS_PREFIX}/lib/libaws-checksums.a;${AWS_PREFIX}/lib/libaws-c-common.a")
add_dependencies(aws-cpp-sdk-s3 aws_ep)
add_dependencies(aws-cpp-sdk-core aws_ep)
endmacro()
if(MILVUS_WITH_AWS)
resolve_dependency(AWS)
# TODO: Don't use global includes but rather target_include_directories
link_directories(SYSTEM ${AWS_PREFIX}/lib)
get_target_property(AWS_CPP_SDK_S3_INCLUDE_DIR aws-cpp-sdk-s3 INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM ${AWS_CPP_SDK_S3_INCLUDE_DIR})
get_target_property(AWS_CPP_SDK_CORE_INCLUDE_DIR aws-cpp-sdk-core INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM ${AWS_CPP_SDK_CORE_INCLUDE_DIR})
endif()

27
cpp/conf/log_config.conf Normal file
View File

@ -0,0 +1,27 @@
* GLOBAL:
FORMAT = "%datetime | %level | %logger | %msg"
FILENAME = "/tmp/milvus/logs/milvus-%datetime{%H:%m}-global.log"
ENABLED = true
TO_FILE = true
TO_STANDARD_OUTPUT = false
SUBSECOND_PRECISION = 3
PERFORMANCE_TRACKING = false
MAX_LOG_FILE_SIZE = 2097152 ## Throw log files away after 2MB
* DEBUG:
FILENAME = "/tmp/milvus/logs/milvus-%datetime{%H:%m}-debug.log"
ENABLED = true
* WARNING:
FILENAME = "/tmp/milvus/logs/milvus-%datetime{%H:%m}-warning.log"
* TRACE:
FILENAME = "/tmp/milvus/logs/milvus-%datetime{%H:%m}-trace.log"
* VERBOSE:
FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg"
TO_FILE = false
TO_STANDARD_OUTPUT = false
## Error logs
* ERROR:
ENABLED = false
FILENAME = "/tmp/milvus/logs/milvus-%datetime{%H:%m}-error.log"
* FATAL:
ENABLED = false
FILENAME = "/tmp/milvus/logs/milvus-%datetime{%H:%m}-fatal.log"

View File

@ -1,21 +1,21 @@
server_config:
address: 0.0.0.0
port: 33001
port: 19530
transfer_protocol: binary #optional: binary, compact, json
server_mode: thread_pool #optional: simple, thread_pool
gpu_index: 0 #which gpu to be used
mode: single #optional: single, cluster
db_config:
db_path: /tmp/milvus
db_backend_url: http://127.0.0.1
db_flush_interval: 5 #unit: second
idmapper_max_open_file: 128
index_building_threshold: 1024 #build index file when raw data file size larger than this value, unit: MB
metric_config:
is_startup: true # true is on, false is off
collector: prometheus # prometheus, now we only have prometheus
prometheus_config:
collect_type: pull # pull means prometheus pull the message from megasearch, push means megasearch push metric to push gateway
collect_type: pull # pull means prometheus pull the message from server, push means server push metric to push gateway
port: 8080
push_gateway_ip_address: 127.0.0.1
push_gateway_port: 9091
@ -24,5 +24,4 @@ license_config:
license_path: "/tmp/system.license"
cache_config:
cpu_cache_capacity: 16 # unit: GB
gpu_cache_capacity: 2 # unit: GB
cpu_cache_capacity: 16 # memory pool to hold index data, unit: GB

View File

@ -1,19 +0,0 @@
server_config:
address: 0.0.0.0
port: 33001
transfer_protocol: binary #optional: binary, compact, json
server_mode: thread_pool #optional: simple, thread_pool
gpu_index: 0 #which gpu to be used
db_config:
db_path: /tmp/milvus
db_backend_url: http://127.0.0.1
db_flush_interval: 5 #unit: second
idmapper_max_open_file: 128
license_config:
license_path: "/tmp/system.license"
cache_config:
cpu_cache_capacity: 16 # unit: GB
gpu_cache_capacity: 2 # unit: GB

View File

@ -1,27 +0,0 @@
* GLOBAL:
FORMAT = "%datetime | %level | %logger | %msg"
FILENAME = "/tmp/milvus/logs/vecwise_engine-%datetime{%H:%m}-global.log"
ENABLED = true
TO_FILE = true
TO_STANDARD_OUTPUT = true
SUBSECOND_PRECISION = 3
PERFORMANCE_TRACKING = false
MAX_LOG_FILE_SIZE = 2097152 ## Throw log files away after 2MB
* DEBUG:
FILENAME = "/tmp/milvus/logs/vecwise_engine-%datetime{%H:%m}-debug.log"
ENABLED = true
* WARNING:
FILENAME = "/tmp/milvus/logs/vecwise_engine-%datetime{%H:%m}-warning.log"
* TRACE:
FILENAME = "/tmp/milvus/logs/vecwise_engine-%datetime{%H:%m}-trace.log"
* VERBOSE:
FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg"
TO_FILE = false
TO_STANDARD_OUTPUT = true
## Error logs
* ERROR:
ENABLED = false
FILENAME = "/tmp/milvus/logs/vecwise_engine-%datetime{%H:%m}-error.log"
* FATAL:
ENABLED = false
FILENAME = "/tmp/milvus/logs/vecwise_engine-%datetime{%H:%m}-fatal.log"

46
cpp/coverage.sh Executable file
View File

@ -0,0 +1,46 @@
#!/bin/bash
LCOV_CMD="lcov"
LCOV_GEN_CMD="genhtml"
FILE_INFO_BASE="base.info"
FILE_INFO_MILVUS="server.info"
FILE_INFO_OUTPUT="output.info"
FILE_INFO_OUTPUT_NEW="output_new.info"
DIR_LCOV_OUTPUT="lcov_out"
DIR_GCNO="cmake_build"
DIR_UNITTEST="milvus/bin"
# get baseline
${LCOV_CMD} -c -i -d ${DIR_GCNO} -o "${FILE_INFO_BASE}"
if [ $? -ne 0 ]; then
echo "gen baseline coverage run failed"
exit -1
fi
for test in `ls ${DIR_UNITTEST}`; do
echo $test
case ${test} in
*_test)
# run unittest
./${DIR_UNITTEST}/${test}
if [ $? -ne 0 ]; then
echo ${DIR_UNITTEST}/${test} "run failed"
fi
esac
done
# gen test converage
${LCOV_CMD} -d ${DIR_GCNO} -o "${FILE_INFO_MILVUS}" -c
# merge coverage
${LCOV_CMD} -a ${FILE_INFO_BASE} -a ${FILE_INFO_MILVUS} -o "${FILE_INFO_OUTPUT}"
# remove third party from tracefiles
${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \
"/usr/*" \
"*/boost/*" \
"*/cmake_build/*_ep-prefix/*" \
# gen html report
${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/

View File

@ -1,4 +1,4 @@
#!/bin/bash
../bin/vecwise_server -c ../conf/server_config.yaml -l ../conf/vecwise_engine_log.conf
../bin/milvus_server -c ../conf/server_config.yaml -l ../conf/log_config.conf

View File

@ -7,7 +7,7 @@ function kill_progress()
sleep 2
}
STATUS=$(kill_progress "vecwise_server" )
STATUS=$(kill_progress "milvus_server" )
if [[ ${STATUS} == "false" ]];then
echo "Milvus server closed abnormally!"

View File

@ -10,15 +10,17 @@ aux_source_directory(config config_files)
aux_source_directory(server server_files)
aux_source_directory(utils utils_files)
aux_source_directory(db db_files)
aux_source_directory(db/scheduler db_scheduler_files)
aux_source_directory(wrapper wrapper_files)
aux_source_directory(metrics metrics_files)
#set(metrics_files
# metrics/Metrics.cpp
# metrics/MetricBase.h
#)
aux_source_directory(db/scheduler scheduler_files)
aux_source_directory(db/scheduler/context scheduler_context_files)
aux_source_directory(db/scheduler/task scheduler_task_files)
set(db_scheduler_files
${scheduler_files}
${scheduler_context_files}
${scheduler_task_files}
)
set(license_check_files
license/LicenseLibrary.cpp
@ -31,16 +33,16 @@ set(license_generator_files
)
set(service_files
thrift/gen-cpp/MegasearchService.cpp
thrift/gen-cpp/megasearch_constants.cpp
thrift/gen-cpp/megasearch_types.cpp
thrift/gen-cpp/MilvusService.cpp
thrift/gen-cpp/milvus_constants.cpp
thrift/gen-cpp/milvus_types.cpp
metrics/SystemInfo.cpp
metrics/SystemInfo.h
server/MegasearchThreadPoolServer.cpp
server/MegasearchThreadPoolServer.h
server/ThreadPoolServer.cpp
server/ThreadPoolServer.h
)
set(vecwise_engine_files
set(engine_files
${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
${cache_files}
${db_files}
@ -53,16 +55,20 @@ set(vecwise_engine_files
set(get_sys_info_files
license/GetSysInfo.cpp)
set(s3_client_files
storage/s3/S3ClientWrapper.cpp
storage/s3/S3ClientWrapper.h)
include_directories(/usr/include)
include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
include_directories(thrift/gen-cpp)
set(third_party_libs
arrow
easyloggingpp
sqlite
thrift
yaml-cpp
libgpufaiss.a
faiss
lapack
openblas
@ -79,6 +85,9 @@ set(third_party_libs
zstd
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
if (MEGASEARCH_WITH_ARROW STREQUAL "ON")
set(third_party_libs ${third_party_libs} arrow)
endif()
if (GPU_VERSION STREQUAL "ON")
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
@ -86,7 +95,6 @@ if (GPU_VERSION STREQUAL "ON")
pthread
libgomp.a
libgfortran.a
libquadmath.a
cudart
cublas
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
@ -96,11 +104,18 @@ else()
pthread
libgomp.a
libgfortran.a
libquadmath.a
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
endif ()
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(engine_libs
${engine_libs}
libquadmath.a
)
endif ()
if (ENABLE_LICENSE STREQUAL "ON")
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs")
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
@ -114,14 +129,14 @@ endif ()
cuda_add_library(vecwise_engine STATIC ${vecwise_engine_files})
target_link_libraries(vecwise_engine ${engine_libs} ${third_party_libs})
cuda_add_library(milvus_engine STATIC ${engine_files})
target_link_libraries(milvus_engine ${engine_libs} ${third_party_libs})
add_library(metrics STATIC ${metrics_files})
if (ENABLE_LICENSE STREQUAL "ON")
add_library(vecwise_license STATIC ${license_check_files})
target_link_libraries(vecwise_license ${license_libs} ${third_party_libs})
add_library(license_check STATIC ${license_check_files})
target_link_libraries(license_check ${license_libs} ${third_party_libs})
endif ()
set(metrics_lib
@ -130,50 +145,40 @@ set(metrics_lib
prometheus-cpp-core
)
#add_library(vecwise_engine STATIC ${metrics_files} )
target_link_libraries(metrics ${metrics_lib})
set(server_libs
vecwise_engine
milvus_engine
pthread
dl
metrics
)
add_executable(vecwise_server
add_executable(milvus_server
${config_files}
${server_files}
${utils_files}
${service_files}
${metrics_files}
#${EASYLOGGINGPP_INCLUDE_DIR}/easylogging++.cc
)
if (ENABLE_LICENSE STREQUAL "ON")
target_link_libraries(vecwise_server ${server_libs} vecwise_license ${third_party_libs})
target_link_libraries(milvus_server ${server_libs} license_check ${third_party_libs})
else ()
target_link_libraries(vecwise_server ${server_libs} ${third_party_libs})
target_link_libraries(milvus_server ${server_libs} ${third_party_libs})
endif()
if (ENABLE_LICENSE STREQUAL "ON")
add_executable(get_sys_info ${get_sys_info_files})
add_executable(license_generator ${license_generator_files})
target_link_libraries(get_sys_info ${license_libs} vecwise_license ${third_party_libs})
target_link_libraries(get_sys_info ${license_libs} license_check ${third_party_libs})
target_link_libraries(license_generator ${license_libs} ${third_party_libs})
install(TARGETS get_sys_info DESTINATION bin)
install(TARGETS license_generator DESTINATION bin)
endif ()
install(TARGETS vecwise_server DESTINATION bin)
install(TARGETS milvus_server DESTINATION bin)
add_subdirectory(sdk)
#target_link_libraries(
# libprometheus-cpp-push.a
# libprometheus-cpp-pull.a
# libprometheus-cpp-core.a
# pthread
# z
# ${CURL_LIBRARIES})

View File

@ -6,7 +6,6 @@
#include "DBImpl.h"
#include "DBMetaImpl.h"
#include "Env.h"
#include "Factories.h"
namespace zilliz {

View File

@ -38,6 +38,10 @@ public:
virtual Status Query(const std::string& table_id, uint64_t k, uint64_t nq,
const float* vectors, const meta::DatesT& dates, QueryResults& results) = 0;
virtual Status Query(const std::string& table_id, const std::vector<std::string>& file_ids,
uint64_t k, uint64_t nq, const float* vectors,
const meta::DatesT& dates, QueryResults& results) = 0;
virtual Status Size(uint64_t& result) = 0;
virtual Status DropAll() = 0;

View File

@ -5,11 +5,13 @@
******************************************************************************/
#include "DBImpl.h"
#include "DBMetaImpl.h"
#include "Env.h"
#include "Log.h"
#include "EngineFactory.h"
#include "metrics/Metrics.h"
#include "scheduler/SearchScheduler.h"
#include "scheduler/TaskScheduler.h"
#include "scheduler/context/SearchContext.h"
#include "scheduler/context/DeleteContext.h"
#include "utils/TimeRecorder.h"
#include <assert.h>
#include <chrono>
@ -25,6 +27,10 @@ namespace engine {
namespace {
static constexpr uint64_t METRIC_ACTION_INTERVAL = 1;
static constexpr uint64_t COMPACT_ACTION_INTERVAL = 1;
static constexpr uint64_t INDEX_ACTION_INTERVAL = 1;
void CollectInsertMetrics(double total_time, size_t n, bool succeed) {
double avg_time = total_time / n;
for (int i = 0; i < n; ++i) {
@ -71,73 +77,107 @@ void CollectFileMetrics(int file_type, size_t file_size, double total_time) {
}
}
void CalcScore(uint64_t vector_count,
const float *vectors_data,
uint64_t dimension,
const SearchContext::ResultSet &result_src,
SearchContext::ResultSet &result_target) {
result_target.clear();
if(result_src.empty()){
return;
}
server::TimeRecorder rc("Calculate Score");
int vec_index = 0;
for(auto& result : result_src) {
const float * vec_data = vectors_data + vec_index*dimension;
double vec_len = 0;
for(uint64_t i = 0; i < dimension; i++) {
vec_len += vec_data[i]*vec_data[i];
}
vec_index++;
double max_score = 0.0;
for(auto& pair : result) {
if(max_score < pair.second) {
max_score = pair.second;
}
}
//makesure socre is less than 100
if(max_score > vec_len) {
vec_len = max_score;
}
//avoid divided by zero
static constexpr double TOLERANCE = std::numeric_limits<float>::epsilon();
if(vec_len < TOLERANCE) {
vec_len = TOLERANCE;
}
SearchContext::Id2ScoreMap score_array;
double vec_len_inverse = 1.0/vec_len;
for(auto& pair : result) {
score_array.push_back(std::make_pair(pair.first, (1 - pair.second*vec_len_inverse)*100.0));
}
result_target.emplace_back(score_array);
}
rc.Elapse("totally cost");
}
}
DBImpl::DBImpl(const Options& options)
: env_(options.env),
options_(options),
bg_compaction_scheduled_(false),
: options_(options),
shutting_down_(false),
bg_build_index_started_(false),
pMeta_(new meta::DBMetaImpl(options_.meta)),
pMemMgr_(new MemManager(pMeta_, options_)) {
StartTimerTasks(options_.memory_sync_interval);
meta_ptr_(new meta::DBMetaImpl(options_.meta)),
mem_mgr_(new MemManager(meta_ptr_, options_)),
compact_thread_pool_(1, 1),
index_thread_pool_(1, 1) {
StartTimerTasks();
}
Status DBImpl::CreateTable(meta::TableSchema& table_schema) {
return pMeta_->CreateTable(table_schema);
return meta_ptr_->CreateTable(table_schema);
}
Status DBImpl::DeleteTable(const std::string& table_id, const meta::DatesT& dates) {
meta::DatePartionedTableFilesSchema files;
auto status = pMeta_->FilesToDelete(table_id, dates, files);
if (!status.ok()) { return status; }
//dates partly delete files of the table but currently we don't support
for (auto &day_files : files) {
for (auto &file : day_files.second) {
boost::filesystem::remove(file.location_);
}
}
mem_mgr_->EraseMemVector(table_id); //not allow insert
meta_ptr_->DeleteTable(table_id); //soft delete table
//dates empty means delete all files of the table
if(dates.empty()) {
meta::TableSchema table_schema;
table_schema.table_id_ = table_id;
status = DescribeTable(table_schema);
pMeta_->DeleteTable(table_id);
boost::system::error_code ec;
boost::filesystem::remove_all(table_schema.location_, ec);
if(ec.failed()) {
ENGINE_LOG_WARNING << "Failed to remove table folder";
}
}
//scheduler will determine when to delete table files
TaskScheduler& scheduler = TaskScheduler::GetInstance();
DeleteContextPtr context = std::make_shared<DeleteContext>(table_id, meta_ptr_);
scheduler.Schedule(context);
return Status::OK();
}
Status DBImpl::DescribeTable(meta::TableSchema& table_schema) {
return pMeta_->DescribeTable(table_schema);
return meta_ptr_->DescribeTable(table_schema);
}
Status DBImpl::HasTable(const std::string& table_id, bool& has_or_not) {
return pMeta_->HasTable(table_id, has_or_not);
return meta_ptr_->HasTable(table_id, has_or_not);
}
Status DBImpl::AllTables(std::vector<meta::TableSchema>& table_schema_array) {
return pMeta_->AllTables(table_schema_array);
return meta_ptr_->AllTables(table_schema_array);
}
Status DBImpl::GetTableRowCount(const std::string& table_id, uint64_t& row_count) {
return pMeta_->Count(table_id, row_count);
return meta_ptr_->Count(table_id, row_count);
}
Status DBImpl::InsertVectors(const std::string& table_id_,
uint64_t n, const float* vectors, IDNumbers& vector_ids_) {
auto start_time = METRICS_NOW_TIME;
Status status = pMemMgr_->InsertVectors(table_id_, n, vectors, vector_ids_);
Status status = mem_mgr_->InsertVectors(table_id_, n, vectors, vector_ids_);
auto end_time = METRICS_NOW_TIME;
double total_time = METRICS_MICROSECONDS(start_time,end_time);
// std::chrono::microseconds time_span = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
@ -166,14 +206,52 @@ Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq,
#if 0
return QuerySync(table_id, k, nq, vectors, dates, results);
#else
return QueryAsync(table_id, k, nq, vectors, dates, results);
//get all table files from table
meta::DatePartionedTableFilesSchema files;
auto status = meta_ptr_->FilesToSearch(table_id, dates, files);
if (!status.ok()) { return status; }
meta::TableFilesSchema file_id_array;
for (auto &day_files : files) {
for (auto &file : day_files.second) {
file_id_array.push_back(file);
}
}
return QueryAsync(table_id, file_id_array, k, nq, vectors, dates, results);
#endif
}
Status DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_ids,
uint64_t k, uint64_t nq, const float* vectors,
const meta::DatesT& dates, QueryResults& results) {
//get specified files
std::vector<size_t> ids;
for (auto &id : file_ids) {
meta::TableFileSchema table_file;
table_file.table_id_ = table_id;
std::string::size_type sz;
ids.push_back(std::stol(id, &sz));
}
meta::TableFilesSchema files_array;
auto status = meta_ptr_->GetTableFiles(table_id, ids, files_array);
if (!status.ok()) {
return status;
}
if(files_array.empty()) {
return Status::Error("Invalid file id");
}
return QueryAsync(table_id, files_array, k, nq, vectors, dates, results);
}
Status DBImpl::QuerySync(const std::string& table_id, uint64_t k, uint64_t nq,
const float* vectors, const meta::DatesT& dates, QueryResults& results) {
meta::DatePartionedTableFilesSchema files;
auto status = pMeta_->FilesToSearch(table_id, dates, files);
auto status = meta_ptr_->FilesToSearch(table_id, dates, files);
if (!status.ok()) { return status; }
ENGINE_LOG_DEBUG << "Search DateT Size = " << files.size();
@ -301,94 +379,119 @@ Status DBImpl::QuerySync(const std::string& table_id, uint64_t k, uint64_t nq,
if (results.empty()) {
return Status::NotFound("Group " + table_id + ", search result not found!");
}
QueryResults temp_results;
CalcScore(nq, vectors, dim, results, temp_results);
results.swap(temp_results);
return Status::OK();
}
Status DBImpl::QueryAsync(const std::string& table_id, uint64_t k, uint64_t nq,
const float* vectors, const meta::DatesT& dates, QueryResults& results) {
Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files,
uint64_t k, uint64_t nq, const float* vectors,
const meta::DatesT& dates, QueryResults& results) {
//step 1: get files to search
meta::DatePartionedTableFilesSchema files;
auto status = pMeta_->FilesToSearch(table_id, dates, files);
if (!status.ok()) { return status; }
ENGINE_LOG_DEBUG << "Search DateT Size=" << files.size();
SearchContextPtr context = std::make_shared<SearchContext>(k, nq, vectors);
for (auto &day_files : files) {
for (auto &file : day_files.second) {
TableFileSchemaPtr file_ptr = std::make_shared<meta::TableFileSchema>(file);
context->AddIndexFile(file_ptr);
}
for (auto &file : files) {
TableFileSchemaPtr file_ptr = std::make_shared<meta::TableFileSchema>(file);
context->AddIndexFile(file_ptr);
}
//step 2: put search task to scheduler
SearchScheduler& scheduler = SearchScheduler::GetInstance();
scheduler.ScheduleSearchTask(context);
TaskScheduler& scheduler = TaskScheduler::GetInstance();
scheduler.Schedule(context);
context->WaitResult();
//step 3: construct results
//step 3: construct results, calculate score between 0 ~ 100
auto& context_result = context->GetResult();
results.swap(context_result);
meta::TableSchema table_schema;
table_schema.table_id_ = table_id;
meta_ptr_->DescribeTable(table_schema);
CalcScore(context->nq(), context->vectors(), table_schema.dimension_, context_result, results);
return Status::OK();
}
void DBImpl::StartTimerTasks(int interval) {
bg_timer_thread_ = std::thread(&DBImpl::BackgroundTimerTask, this, interval);
void DBImpl::StartTimerTasks() {
bg_timer_thread_ = std::thread(&DBImpl::BackgroundTimerTask, this);
}
void DBImpl::BackgroundTimerTask(int interval) {
void DBImpl::BackgroundTimerTask() {
Status status;
server::SystemInfo::GetInstance().Init();
while (true) {
if (!bg_error_.ok()) break;
if (shutting_down_.load(std::memory_order_acquire)) break;
if (shutting_down_.load(std::memory_order_acquire)){
for(auto& iter : compact_thread_results_) {
iter.wait();
}
for(auto& iter : index_thread_results_) {
iter.wait();
}
break;
}
std::this_thread::sleep_for(std::chrono::seconds(interval));
std::this_thread::sleep_for(std::chrono::seconds(1));
server::Metrics::GetInstance().KeepingAliveCounterIncrement(interval);
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
server::Metrics::GetInstance().CacheUsageGaugeSet(cache_usage*100/cache_total);
uint64_t size;
Size(size);
server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
server::Metrics::GetInstance().CPUUsagePercentSet();
server::Metrics::GetInstance().RAMUsagePercentSet();
server::Metrics::GetInstance().GPUPercentGaugeSet();
server::Metrics::GetInstance().GPUMemoryUsageGaugeSet();
server::Metrics::GetInstance().OctetsSet();
TrySchedule();
StartMetricTask();
StartCompactionTask();
StartBuildIndexTask();
}
}
void DBImpl::TrySchedule() {
if (bg_compaction_scheduled_) return;
if (!bg_error_.ok()) return;
void DBImpl::StartMetricTask() {
static uint64_t metric_clock_tick = 0;
metric_clock_tick++;
if(metric_clock_tick%METRIC_ACTION_INTERVAL != 0) {
return;
}
bg_compaction_scheduled_ = true;
env_->Schedule(&DBImpl::BGWork, this);
server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
server::Metrics::GetInstance().CacheUsageGaugeSet(cache_usage*100/cache_total);
uint64_t size;
Size(size);
server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
server::Metrics::GetInstance().CPUUsagePercentSet();
server::Metrics::GetInstance().RAMUsagePercentSet();
server::Metrics::GetInstance().GPUPercentGaugeSet();
server::Metrics::GetInstance().GPUMemoryUsageGaugeSet();
server::Metrics::GetInstance().OctetsSet();
}
void DBImpl::BGWork(void* db_) {
reinterpret_cast<DBImpl*>(db_)->BackgroundCall();
}
void DBImpl::StartCompactionTask() {
static uint64_t compact_clock_tick = 0;
compact_clock_tick++;
if(compact_clock_tick%COMPACT_ACTION_INTERVAL != 0) {
return;
}
void DBImpl::BackgroundCall() {
std::lock_guard<std::mutex> lock(mutex_);
assert(bg_compaction_scheduled_);
//serialize memory data
std::vector<std::string> temp_table_ids;
mem_mgr_->Serialize(temp_table_ids);
for(auto& id : temp_table_ids) {
compact_table_ids_.insert(id);
}
if (!bg_error_.ok() || shutting_down_.load(std::memory_order_acquire))
return ;
//compactiong has been finished?
if(!compact_thread_results_.empty()) {
std::chrono::milliseconds span(10);
if (compact_thread_results_.back().wait_for(span) == std::future_status::ready) {
compact_thread_results_.pop_back();
}
}
BackgroundCompaction();
bg_compaction_scheduled_ = false;
bg_work_finish_signal_.notify_all();
//add new compaction task
if(compact_thread_results_.empty()) {
compact_thread_results_.push_back(
compact_thread_pool_.enqueue(&DBImpl::BackgroundCompaction, this, compact_table_ids_));
compact_table_ids_.clear();
}
}
Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date,
@ -396,10 +499,10 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date,
meta::TableFileSchema table_file;
table_file.table_id_ = table_id;
table_file.date_ = date;
Status status = pMeta_->CreateTableFile(table_file);
Status status = meta_ptr_->CreateTableFile(table_file);
if (!status.ok()) {
LOG(INFO) << status.ToString() << std::endl;
ENGINE_LOG_INFO << status.ToString() << std::endl;
return status;
}
@ -420,7 +523,7 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date,
file_schema.file_type_ = meta::TableFileSchema::TO_DELETE;
updated.push_back(file_schema);
LOG(DEBUG) << "Merging file " << file_schema.file_id_;
ENGINE_LOG_DEBUG << "Merging file " << file_schema.file_id_;
index_size = index->Size();
if (index_size >= options_.index_trigger_size) break;
@ -436,8 +539,8 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date,
}
table_file.size_ = index_size;
updated.push_back(table_file);
status = pMeta_->UpdateTableFiles(updated);
LOG(DEBUG) << "New merged file " << table_file.file_id_ <<
status = meta_ptr_->UpdateTableFiles(updated);
ENGINE_LOG_DEBUG << "New merged file " << table_file.file_id_ <<
" of size=" << index->PhysicalSize()/(1024*1024) << " M";
index->Cache();
@ -447,13 +550,12 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date,
Status DBImpl::BackgroundMergeFiles(const std::string& table_id) {
meta::DatePartionedTableFilesSchema raw_files;
auto status = pMeta_->FilesToMerge(table_id, raw_files);
auto status = meta_ptr_->FilesToMerge(table_id, raw_files);
if (!status.ok()) {
return status;
}
bool has_merge = false;
for (auto& kv : raw_files) {
auto files = kv.second;
if (files.size() <= options_.merge_trigger_number) {
@ -461,87 +563,16 @@ Status DBImpl::BackgroundMergeFiles(const std::string& table_id) {
}
has_merge = true;
MergeFiles(table_id, kv.first, kv.second);
}
pMeta_->Archive();
TryBuildIndex();
pMeta_->CleanUpFilesWithTTL(1);
return Status::OK();
}
Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
meta::TableFileSchema table_file;
table_file.table_id_ = file.table_id_;
table_file.date_ = file.date_;
Status status = pMeta_->CreateTableFile(table_file);
if (!status.ok()) {
return status;
}
ExecutionEnginePtr to_index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_);
to_index->Load();
auto start_time = METRICS_NOW_TIME;
auto index = to_index->BuildIndex(table_file.location_);
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time);
table_file.file_type_ = meta::TableFileSchema::INDEX;
table_file.size_ = index->Size();
auto to_remove = file;
to_remove.file_type_ = meta::TableFileSchema::TO_DELETE;
meta::TableFilesSchema update_files = {to_remove, table_file};
pMeta_->UpdateTableFiles(update_files);
LOG(DEBUG) << "New index file " << table_file.file_id_ << " of size "
<< index->PhysicalSize()/(1024*1024) << " M"
<< " from file " << to_remove.file_id_;
index->Cache();
pMeta_->Archive();
return Status::OK();
}
void DBImpl::BackgroundBuildIndex() {
std::lock_guard<std::mutex> lock(build_index_mutex_);
assert(bg_build_index_started_);
meta::TableFilesSchema to_index_files;
pMeta_->FilesToIndex(to_index_files);
Status status;
for (auto& file : to_index_files) {
/* LOG(DEBUG) << "Buiding index for " << file.location; */
status = BuildIndex(file);
if (!status.ok()) {
bg_error_ = status;
return;
if (shutting_down_.load(std::memory_order_acquire)){
break;
}
}
/* LOG(DEBUG) << "All Buiding index Done"; */
bg_build_index_started_ = false;
bg_build_index_finish_signal_.notify_all();
}
Status DBImpl::TryBuildIndex() {
if (bg_build_index_started_) return Status::OK();
if (shutting_down_.load(std::memory_order_acquire)) return Status::OK();
bg_build_index_started_ = true;
std::thread build_index_task(&DBImpl::BackgroundBuildIndex, this);
build_index_task.detach();
return Status::OK();
}
void DBImpl::BackgroundCompaction() {
std::vector<std::string> table_ids;
pMemMgr_->Serialize(table_ids);
void DBImpl::BackgroundCompaction(std::set<std::string> table_ids) {
Status status;
for (auto table_id : table_ids) {
status = BackgroundMergeFiles(table_id);
@ -550,34 +581,125 @@ void DBImpl::BackgroundCompaction() {
return;
}
}
meta_ptr_->Archive();
meta_ptr_->CleanUpFilesWithTTL(1);
}
void DBImpl::StartBuildIndexTask() {
static uint64_t index_clock_tick = 0;
index_clock_tick++;
if(index_clock_tick%INDEX_ACTION_INTERVAL != 0) {
return;
}
//build index has been finished?
if(!index_thread_results_.empty()) {
std::chrono::milliseconds span(10);
if (index_thread_results_.back().wait_for(span) == std::future_status::ready) {
index_thread_results_.pop_back();
}
}
//add new build index task
if(index_thread_results_.empty()) {
index_thread_results_.push_back(
index_thread_pool_.enqueue(&DBImpl::BackgroundBuildIndex, this));
}
}
Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
ExecutionEnginePtr to_index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_);
if(to_index == nullptr) {
return Status::Error("Invalid engine type");
}
try {
//step 1: load index
to_index->Load();
//step 2: create table file
meta::TableFileSchema table_file;
table_file.table_id_ = file.table_id_;
table_file.date_ = file.date_;
Status status = meta_ptr_->CreateTableFile(table_file);
if (!status.ok()) {
return status;
}
//step 3: build index
auto start_time = METRICS_NOW_TIME;
auto index = to_index->BuildIndex(table_file.location_);
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time);
//step 4: if table has been deleted, dont save index file
bool has_table = false;
meta_ptr_->HasTable(file.table_id_, has_table);
if(!has_table) {
meta_ptr_->DeleteTableFiles(file.table_id_);
return Status::OK();
}
//step 5: save index file
index->Serialize();
//step 6: update meta
table_file.file_type_ = meta::TableFileSchema::INDEX;
table_file.size_ = index->Size();
auto to_remove = file;
to_remove.file_type_ = meta::TableFileSchema::TO_DELETE;
meta::TableFilesSchema update_files = {to_remove, table_file};
meta_ptr_->UpdateTableFiles(update_files);
ENGINE_LOG_DEBUG << "New index file " << table_file.file_id_ << " of size "
<< index->PhysicalSize()/(1024*1024) << " M"
<< " from file " << to_remove.file_id_;
index->Cache();
} catch (std::exception& ex) {
return Status::Error("Build index encounter exception", ex.what());
}
return Status::OK();
}
void DBImpl::BackgroundBuildIndex() {
meta::TableFilesSchema to_index_files;
meta_ptr_->FilesToIndex(to_index_files);
Status status;
for (auto& file : to_index_files) {
/* ENGINE_LOG_DEBUG << "Buiding index for " << file.location; */
status = BuildIndex(file);
if (!status.ok()) {
bg_error_ = status;
return;
}
if (shutting_down_.load(std::memory_order_acquire)){
break;
}
}
/* ENGINE_LOG_DEBUG << "All Buiding index Done"; */
}
Status DBImpl::DropAll() {
return pMeta_->DropAll();
return meta_ptr_->DropAll();
}
Status DBImpl::Size(uint64_t& result) {
return pMeta_->Size(result);
return meta_ptr_->Size(result);
}
DBImpl::~DBImpl() {
{
std::unique_lock<std::mutex> lock(mutex_);
shutting_down_.store(true, std::memory_order_release);
while (bg_compaction_scheduled_) {
bg_work_finish_signal_.wait(lock);
}
}
{
std::unique_lock<std::mutex> lock(build_index_mutex_);
while (bg_build_index_started_) {
bg_build_index_finish_signal_.wait(lock);
}
}
shutting_down_.store(true, std::memory_order_release);
bg_timer_thread_.join();
std::vector<std::string> ids;
pMemMgr_->Serialize(ids);
env_->Stop();
mem_mgr_->Serialize(ids);
}
} // namespace engine

View File

@ -8,12 +8,15 @@
#include "DB.h"
#include "MemManager.h"
#include "Types.h"
#include "utils/ThreadPool.h"
#include <mutex>
#include <condition_variable>
#include <memory>
#include <atomic>
#include <thread>
#include <list>
#include <set>
namespace zilliz {
namespace milvus {
@ -48,6 +51,10 @@ public:
virtual Status Query(const std::string& table_id, uint64_t k, uint64_t nq,
const float* vectors, const meta::DatesT& dates, QueryResults& results) override;
virtual Status Query(const std::string& table_id, const std::vector<std::string>& file_ids,
uint64_t k, uint64_t nq, const float* vectors,
const meta::DatesT& dates, QueryResults& results) override;
virtual Status DropAll() override;
virtual Status Size(uint64_t& result) override;
@ -58,43 +65,43 @@ private:
Status QuerySync(const std::string& table_id, uint64_t k, uint64_t nq,
const float* vectors, const meta::DatesT& dates, QueryResults& results);
Status QueryAsync(const std::string& table_id, uint64_t k, uint64_t nq,
const float* vectors, const meta::DatesT& dates, QueryResults& results);
Status QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files,
uint64_t k, uint64_t nq, const float* vectors,
const meta::DatesT& dates, QueryResults& results);
void StartTimerTasks();
void BackgroundTimerTask();
void StartMetricTask();
void StartCompactionTask();
Status MergeFiles(const std::string& table_id,
const meta::DateT& date,
const meta::TableFilesSchema& files);
Status BackgroundMergeFiles(const std::string& table_id);
void BackgroundCompaction(std::set<std::string> table_ids);
void StartBuildIndexTask();
void BackgroundBuildIndex();
Status BuildIndex(const meta::TableFileSchema&);
Status TryBuildIndex();
Status MergeFiles(const std::string& table_id,
const meta::DateT& date,
const meta::TableFilesSchema& files);
Status BackgroundMergeFiles(const std::string& table_id);
void TrySchedule();
void StartTimerTasks(int interval);
void BackgroundTimerTask(int interval);
static void BGWork(void* db);
void BackgroundCall();
void BackgroundCompaction();
Env* const env_;
const Options options_;
std::mutex mutex_;
std::condition_variable bg_work_finish_signal_;
bool bg_compaction_scheduled_;
Status bg_error_;
std::atomic<bool> shutting_down_;
std::mutex build_index_mutex_;
bool bg_build_index_started_;
std::condition_variable bg_build_index_finish_signal_;
std::thread bg_timer_thread_;
MetaPtr pMeta_;
MemManagerPtr pMemMgr_;
MetaPtr meta_ptr_;
MemManagerPtr mem_mgr_;
server::ThreadPool compact_thread_pool_;
std::list<std::future<void>> compact_thread_results_;
std::set<std::string> compact_table_ids_;
server::ThreadPool index_thread_pool_;
std::list<std::future<void>> index_thread_results_;
}; // DBImpl

View File

@ -29,24 +29,43 @@ using namespace sqlite_orm;
namespace {
void HandleException(std::exception &e) {
ENGINE_LOG_DEBUG << "Engine meta exception: " << e.what();
throw e;
Status HandleException(const std::string& desc, std::exception &e) {
ENGINE_LOG_ERROR << desc << ": " << e.what();
return Status::DBTransactionError(desc, e.what());
}
class MetricCollector {
public:
MetricCollector() {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
start_time_ = METRICS_NOW_TIME;
}
~MetricCollector() {
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time_, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
}
private:
using TIME_POINT = std::chrono::system_clock::time_point;
TIME_POINT start_time_;
};
}
inline auto StoragePrototype(const std::string &path) {
return make_storage(path,
make_table("Table",
make_table("Tables",
make_column("id", &TableSchema::id_, primary_key()),
make_column("table_id", &TableSchema::table_id_, unique()),
make_column("state", &TableSchema::state_),
make_column("dimension", &TableSchema::dimension_),
make_column("created_on", &TableSchema::created_on_),
make_column("files_cnt", &TableSchema::files_cnt_, default_value(0)),
make_column("engine_type", &TableSchema::engine_type_),
make_column("store_raw_data", &TableSchema::store_raw_data_)),
make_table("TableFile",
make_table("TableFiles",
make_column("id", &TableFileSchema::id_, primary_key()),
make_column("table_id", &TableFileSchema::table_id_),
make_column("engine_type", &TableFileSchema::engine_type_),
@ -109,9 +128,9 @@ Status DBMetaImpl::Initialize() {
if (!boost::filesystem::is_directory(options_.path)) {
auto ret = boost::filesystem::create_directory(options_.path);
if (!ret) {
ENGINE_LOG_ERROR << "Create directory " << options_.path << " Error";
ENGINE_LOG_ERROR << "Failed to create db directory " << options_.path;
return Status::DBTransactionError("Failed to create db directory", options_.path);
}
assert(ret);
}
ConnectorPtr = std::make_unique<ConnectorT>(StoragePrototype(options_.path + "/meta.sqlite"));
@ -139,15 +158,15 @@ Status DBMetaImpl::DropPartitionsByDates(const std::string &table_id,
return status;
}
auto yesterday = GetDateWithDelta(-1);
for (auto &date : dates) {
if (date >= yesterday) {
return Status::Error("Could not delete partitions with 2 days");
}
}
try {
auto yesterday = GetDateWithDelta(-1);
for (auto &date : dates) {
if (date >= yesterday) {
return Status::Error("Could not delete partitions with 2 days");
}
}
ConnectorPtr->update_all(
set(
c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE
@ -157,40 +176,51 @@ Status DBMetaImpl::DropPartitionsByDates(const std::string &table_id,
in(&TableFileSchema::date_, dates)
));
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when drop partition", e);
}
return Status::OK();
}
Status DBMetaImpl::CreateTable(TableSchema &table_schema) {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
if (table_schema.table_id_ == "") {
NextTableId(table_schema.table_id_);
}
table_schema.files_cnt_ = 0;
table_schema.id_ = -1;
table_schema.created_on_ = utils::GetMicroSecTimeStamp();
auto start_time = METRICS_NOW_TIME;
{
try {
MetricCollector metric;
if (table_schema.table_id_ == "") {
NextTableId(table_schema.table_id_);
} else {
auto table = ConnectorPtr->select(columns(&TableSchema::state_),
where(c(&TableSchema::table_id_) == table_schema.table_id_));
if (table.size() == 1) {
std::string msg = (TableSchema::TO_DELETE == std::get<0>(table[0])) ?
"Table already exists and it is in delete state, please wait a second" : "Table already exists";
return Status::Error(msg);
}
}
table_schema.files_cnt_ = 0;
table_schema.id_ = -1;
table_schema.created_on_ = utils::GetMicroSecTimeStamp();
try {
auto id = ConnectorPtr->insert(table_schema);
table_schema.id_ = id;
} catch (...) {
return Status::DBTransactionError("Add Table Error");
}
}
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
auto table_path = GetTablePath(table_schema.table_id_);
table_schema.location_ = table_path;
if (!boost::filesystem::is_directory(table_path)) {
auto ret = boost::filesystem::create_directories(table_path);
if (!ret) {
ENGINE_LOG_ERROR << "Create directory " << table_path << " Error";
auto table_path = GetTablePath(table_schema.table_id_);
table_schema.location_ = table_path;
if (!boost::filesystem::is_directory(table_path)) {
auto ret = boost::filesystem::create_directories(table_path);
if (!ret) {
ENGINE_LOG_ERROR << "Create directory " << table_path << " Error";
return Status::Error("Failed to create table path");
}
}
assert(ret);
} catch (std::exception &e) {
return HandleException("Encounter exception when create table", e);
}
return Status::OK();
@ -198,14 +228,53 @@ Status DBMetaImpl::CreateTable(TableSchema &table_schema) {
Status DBMetaImpl::DeleteTable(const std::string& table_id) {
try {
//drop the table from meta
auto tables = ConnectorPtr->select(columns(&TableSchema::id_),
MetricCollector metric;
//soft delete table
auto tables = ConnectorPtr->select(columns(&TableSchema::id_,
&TableSchema::files_cnt_,
&TableSchema::dimension_,
&TableSchema::engine_type_,
&TableSchema::store_raw_data_,
&TableSchema::created_on_),
where(c(&TableSchema::table_id_) == table_id));
for (auto &table : tables) {
ConnectorPtr->remove<TableSchema>(std::get<0>(table));
TableSchema table_schema;
table_schema.table_id_ = table_id;
table_schema.state_ = (int)TableSchema::TO_DELETE;
table_schema.id_ = std::get<0>(table);
table_schema.files_cnt_ = std::get<1>(table);
table_schema.dimension_ = std::get<2>(table);
table_schema.engine_type_ = std::get<3>(table);
table_schema.store_raw_data_ = std::get<4>(table);
table_schema.created_on_ = std::get<5>(table);
ConnectorPtr->update<TableSchema>(table_schema);
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when delete table", e);
}
return Status::OK();
}
Status DBMetaImpl::DeleteTableFiles(const std::string& table_id) {
try {
MetricCollector metric;
//soft delete table files
ConnectorPtr->update_all(
set(
c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE,
c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp()
),
where(
c(&TableFileSchema::table_id_) == table_id and
c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE
));
} catch (std::exception &e) {
return HandleException("Encounter exception when delete table files", e);
}
return Status::OK();
@ -213,19 +282,17 @@ Status DBMetaImpl::DeleteTable(const std::string& table_id) {
Status DBMetaImpl::DescribeTable(TableSchema &table_schema) {
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
MetricCollector metric;
auto groups = ConnectorPtr->select(columns(&TableSchema::id_,
&TableSchema::table_id_,
&TableSchema::files_cnt_,
&TableSchema::dimension_,
&TableSchema::engine_type_,
&TableSchema::store_raw_data_),
where(c(&TableSchema::table_id_) == table_schema.table_id_));
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
assert(groups.size() <= 1);
where(c(&TableSchema::table_id_) == table_schema.table_id_
and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE));
if (groups.size() == 1) {
table_schema.id_ = std::get<0>(groups[0]);
table_schema.files_cnt_ = std::get<2>(groups[0]);
@ -240,47 +307,44 @@ Status DBMetaImpl::DescribeTable(TableSchema &table_schema) {
table_schema.location_ = table_path;
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when describe table", e);
}
return Status::OK();
}
Status DBMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) {
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
has_or_not = false;
try {
MetricCollector metric;
auto tables = ConnectorPtr->select(columns(&TableSchema::id_),
where(c(&TableSchema::table_id_) == table_id));
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
assert(tables.size() <= 1);
where(c(&TableSchema::table_id_) == table_id
and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE));
if (tables.size() == 1) {
has_or_not = true;
} else {
has_or_not = false;
}
} catch (std::exception &e) {
HandleException(e);
HandleException("Encounter exception when lookup table", e);
}
return Status::OK();
}
Status DBMetaImpl::AllTables(std::vector<TableSchema>& table_schema_array) {
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
MetricCollector metric;
auto selected = ConnectorPtr->select(columns(&TableSchema::id_,
&TableSchema::table_id_,
&TableSchema::files_cnt_,
&TableSchema::dimension_,
&TableSchema::engine_type_,
&TableSchema::store_raw_data_));
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
&TableSchema::store_raw_data_),
where(c(&TableSchema::state_) != (int)TableSchema::TO_DELETE));
for (auto &table : selected) {
TableSchema schema;
schema.id_ = std::get<0>(table);
@ -292,8 +356,9 @@ Status DBMetaImpl::AllTables(std::vector<TableSchema>& table_schema_array) {
table_schema_array.emplace_back(schema);
}
} catch (std::exception &e) {
HandleException(e);
HandleException("Encounter exception when lookup all tables", e);
}
return Status::OK();
@ -310,37 +375,33 @@ Status DBMetaImpl::CreateTableFile(TableFileSchema &file_schema) {
return status;
}
NextFileId(file_schema.file_id_);
file_schema.file_type_ = TableFileSchema::NEW;
file_schema.dimension_ = table_schema.dimension_;
file_schema.size_ = 0;
file_schema.created_on_ = utils::GetMicroSecTimeStamp();
file_schema.updated_time_ = file_schema.created_on_;
file_schema.engine_type_ = table_schema.engine_type_;
GetTableFilePath(file_schema);
try {
MetricCollector metric;
{
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
auto id = ConnectorPtr->insert(file_schema);
file_schema.id_ = id;
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
} catch (...) {
return Status::DBTransactionError("Add file Error");
NextFileId(file_schema.file_id_);
file_schema.file_type_ = TableFileSchema::NEW;
file_schema.dimension_ = table_schema.dimension_;
file_schema.size_ = 0;
file_schema.created_on_ = utils::GetMicroSecTimeStamp();
file_schema.updated_time_ = file_schema.created_on_;
file_schema.engine_type_ = table_schema.engine_type_;
GetTableFilePath(file_schema);
auto id = ConnectorPtr->insert(file_schema);
file_schema.id_ = id;
auto partition_path = GetTableDatePartitionPath(file_schema.table_id_, file_schema.date_);
if (!boost::filesystem::is_directory(partition_path)) {
auto ret = boost::filesystem::create_directory(partition_path);
if (!ret) {
ENGINE_LOG_ERROR << "Create directory " << partition_path << " Error";
return Status::DBTransactionError("Failed to create partition directory");
}
}
}
auto partition_path = GetTableDatePartitionPath(file_schema.table_id_, file_schema.date_);
if (!boost::filesystem::is_directory(partition_path)) {
auto ret = boost::filesystem::create_directory(partition_path);
if (!ret) {
ENGINE_LOG_ERROR << "Create directory " << partition_path << " Error";
}
assert(ret);
} catch (std::exception& ex) {
return HandleException("Encounter exception when create table file", ex);
}
return Status::OK();
@ -350,8 +411,8 @@ Status DBMetaImpl::FilesToIndex(TableFilesSchema &files) {
files.clear();
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
MetricCollector metric;
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
&TableFileSchema::file_id_,
@ -361,9 +422,6 @@ Status DBMetaImpl::FilesToIndex(TableFilesSchema &files) {
&TableFileSchema::engine_type_),
where(c(&TableFileSchema::file_type_)
== (int) TableFileSchema::TO_INDEX));
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
std::map<std::string, TableSchema> groups;
TableFileSchema table_file;
@ -391,8 +449,9 @@ Status DBMetaImpl::FilesToIndex(TableFilesSchema &files) {
table_file.dimension_ = groups[table_file.table_id_].dimension_;
files.push_back(table_file);
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when iterate raw files", e);
}
return Status::OK();
@ -404,8 +463,8 @@ Status DBMetaImpl::FilesToSearch(const std::string &table_id,
files.clear();
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
MetricCollector metric;
if (partition.empty()) {
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
@ -420,9 +479,7 @@ Status DBMetaImpl::FilesToSearch(const std::string &table_id,
== (int) TableFileSchema::TO_INDEX or
c(&TableFileSchema::file_type_)
== (int) TableFileSchema::INDEX)));
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
TableSchema table_schema;
table_schema.table_id_ = table_id;
auto status = DescribeTable(table_schema);
@ -455,7 +512,8 @@ Status DBMetaImpl::FilesToSearch(const std::string &table_id,
&TableFileSchema::file_id_,
&TableFileSchema::file_type_,
&TableFileSchema::size_,
&TableFileSchema::date_),
&TableFileSchema::date_,
&TableFileSchema::engine_type_),
where(c(&TableFileSchema::table_id_) == table_id and
in(&TableFileSchema::date_, partition) and
(c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW or
@ -463,9 +521,7 @@ Status DBMetaImpl::FilesToSearch(const std::string &table_id,
== (int) TableFileSchema::TO_INDEX or
c(&TableFileSchema::file_type_)
== (int) TableFileSchema::INDEX)));
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
TableSchema table_schema;
table_schema.table_id_ = table_id;
auto status = DescribeTable(table_schema);
@ -482,6 +538,7 @@ Status DBMetaImpl::FilesToSearch(const std::string &table_id,
table_file.file_type_ = std::get<3>(file);
table_file.size_ = std::get<4>(file);
table_file.date_ = std::get<5>(file);
table_file.engine_type_ = std::get<6>(file);
table_file.dimension_ = table_schema.dimension_;
GetTableFilePath(table_file);
auto dateItr = files.find(table_file.date_);
@ -493,7 +550,7 @@ Status DBMetaImpl::FilesToSearch(const std::string &table_id,
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when iterate index files", e);
}
return Status::OK();
@ -504,8 +561,8 @@ Status DBMetaImpl::FilesToMerge(const std::string &table_id,
files.clear();
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
MetricCollector metric;
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
&TableFileSchema::file_id_,
@ -513,10 +570,9 @@ Status DBMetaImpl::FilesToMerge(const std::string &table_id,
&TableFileSchema::size_,
&TableFileSchema::date_),
where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW and
c(&TableFileSchema::table_id_) == table_id));
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
c(&TableFileSchema::table_id_) == table_id),
order_by(&TableFileSchema::size_).desc());
TableSchema table_schema;
table_schema.table_id_ = table_id;
auto status = DescribeTable(table_schema);
@ -542,110 +598,48 @@ Status DBMetaImpl::FilesToMerge(const std::string &table_id,
files[table_file.date_].push_back(table_file);
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when iterate merge files", e);
}
return Status::OK();
}
Status DBMetaImpl::FilesToDelete(const std::string& table_id,
const DatesT& partition,
DatePartionedTableFilesSchema& files) {
auto now = utils::GetMicroSecTimeStamp();
Status DBMetaImpl::GetTableFiles(const std::string& table_id,
const std::vector<size_t>& ids,
TableFilesSchema& table_files) {
try {
if(partition.empty()) {
//step 1: get table files by dates
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
&TableFileSchema::file_id_,
&TableFileSchema::size_,
&TableFileSchema::date_),
where(c(&TableFileSchema::file_type_) !=
(int) TableFileSchema::TO_DELETE
and c(&TableFileSchema::table_id_) == table_id));
//step 2: erase table files from meta
for (auto &file : selected) {
TableFileSchema table_file;
table_file.id_ = std::get<0>(file);
table_file.table_id_ = std::get<1>(file);
table_file.file_id_ = std::get<2>(file);
table_file.size_ = std::get<3>(file);
table_file.date_ = std::get<4>(file);
GetTableFilePath(table_file);
auto dateItr = files.find(table_file.date_);
if (dateItr == files.end()) {
files[table_file.date_] = TableFilesSchema();
}
files[table_file.date_].push_back(table_file);
ConnectorPtr->remove<TableFileSchema>(std::get<0>(file));
}
} else {
//step 1: get all table files
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
&TableFileSchema::file_id_,
&TableFileSchema::size_,
&TableFileSchema::date_),
where(c(&TableFileSchema::file_type_) !=
(int) TableFileSchema::TO_DELETE
and in(&TableFileSchema::date_, partition)
and c(&TableFileSchema::table_id_) == table_id));
//step 2: erase table files from meta
for (auto &file : selected) {
TableFileSchema table_file;
table_file.id_ = std::get<0>(file);
table_file.table_id_ = std::get<1>(file);
table_file.file_id_ = std::get<2>(file);
table_file.size_ = std::get<3>(file);
table_file.date_ = std::get<4>(file);
GetTableFilePath(table_file);
auto dateItr = files.find(table_file.date_);
if (dateItr == files.end()) {
files[table_file.date_] = TableFilesSchema();
}
files[table_file.date_].push_back(table_file);
ConnectorPtr->remove<TableFileSchema>(std::get<0>(file));
}
}
} catch (std::exception &e) {
HandleException(e);
}
return Status::OK();
}
Status DBMetaImpl::GetTableFile(TableFileSchema &file_schema) {
try {
auto files = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
&TableFileSchema::file_id_,
table_files.clear();
auto files = ConnectorPtr->select(columns(&TableFileSchema::file_id_,
&TableFileSchema::file_type_,
&TableFileSchema::size_,
&TableFileSchema::date_),
where(c(&TableFileSchema::file_id_) == file_schema.file_id_ and
c(&TableFileSchema::table_id_) == file_schema.table_id_
&TableFileSchema::date_,
&TableFileSchema::engine_type_),
where(c(&TableFileSchema::table_id_) == table_id and
in(&TableFileSchema::id_, ids)
));
assert(files.size() <= 1);
if (files.size() == 1) {
file_schema.id_ = std::get<0>(files[0]);
file_schema.table_id_ = std::get<1>(files[0]);
file_schema.file_id_ = std::get<2>(files[0]);
file_schema.file_type_ = std::get<3>(files[0]);
file_schema.size_ = std::get<4>(files[0]);
file_schema.date_ = std::get<5>(files[0]);
} else {
return Status::NotFound("Table:" + file_schema.table_id_ +
" File:" + file_schema.file_id_ + " not found");
TableSchema table_schema;
table_schema.table_id_ = table_id;
auto status = DescribeTable(table_schema);
if (!status.ok()) {
return status;
}
for (auto &file : files) {
TableFileSchema file_schema;
file_schema.table_id_ = table_id;
file_schema.file_id_ = std::get<0>(file);
file_schema.file_type_ = std::get<1>(file);
file_schema.size_ = std::get<2>(file);
file_schema.date_ = std::get<3>(file);
file_schema.engine_type_ = std::get<4>(file);
file_schema.dimension_ = table_schema.dimension_;
GetTableFilePath(file_schema);
table_files.emplace_back(file_schema);
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when lookup table files", e);
}
return Status::OK();
@ -674,7 +668,7 @@ Status DBMetaImpl::Archive() {
c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE
));
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when update table files", e);
}
}
if (criteria == "disk") {
@ -704,52 +698,65 @@ Status DBMetaImpl::Size(uint64_t &result) {
result += (uint64_t) (*std::get<0>(sub_query));
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when calculte db size", e);
}
return Status::OK();
}
Status DBMetaImpl::DiscardFiles(long to_discard_size) {
LOG(DEBUG) << "About to discard size=" << to_discard_size;
if (to_discard_size <= 0) {
return Status::OK();
}
ENGINE_LOG_DEBUG << "About to discard size=" << to_discard_size;
try {
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::size_),
where(c(&TableFileSchema::file_type_)
MetricCollector metric;
auto commited = ConnectorPtr->transaction([&]() mutable {
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::size_),
where(c(&TableFileSchema::file_type_)
!= (int) TableFileSchema::TO_DELETE),
order_by(&TableFileSchema::id_),
limit(10));
order_by(&TableFileSchema::id_),
limit(10));
std::vector<int> ids;
TableFileSchema table_file;
std::vector<int> ids;
TableFileSchema table_file;
for (auto &file : selected) {
if (to_discard_size <= 0) break;
table_file.id_ = std::get<0>(file);
table_file.size_ = std::get<1>(file);
ids.push_back(table_file.id_);
ENGINE_LOG_DEBUG << "Discard table_file.id=" << table_file.file_id_
<< " table_file.size=" << table_file.size_;
to_discard_size -= table_file.size_;
for (auto &file : selected) {
if (to_discard_size <= 0) break;
table_file.id_ = std::get<0>(file);
table_file.size_ = std::get<1>(file);
ids.push_back(table_file.id_);
ENGINE_LOG_DEBUG << "Discard table_file.id=" << table_file.file_id_
<< " table_file.size=" << table_file.size_;
to_discard_size -= table_file.size_;
}
if (ids.size() == 0) {
return true;
}
ConnectorPtr->update_all(
set(
c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE,
c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp()
),
where(
in(&TableFileSchema::id_, ids)
));
return true;
});
if (!commited) {
return Status::DBTransactionError("Update table file error");
}
if (ids.size() == 0) {
return Status::OK();
}
ConnectorPtr->update_all(
set(
c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE
),
where(
in(&TableFileSchema::id_, ids)
));
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when discard table file", e);
}
return DiscardFiles(to_discard_size);
@ -758,38 +765,64 @@ Status DBMetaImpl::DiscardFiles(long to_discard_size) {
Status DBMetaImpl::UpdateTableFile(TableFileSchema &file_schema) {
file_schema.updated_time_ = utils::GetMicroSecTimeStamp();
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
MetricCollector metric;
auto tables = ConnectorPtr->select(columns(&TableSchema::state_),
where(c(&TableSchema::table_id_) == file_schema.table_id_));
//if the table has been deleted, just mark the table file as TO_DELETE
//clean thread will delete the file later
if(tables.size() < 1 || std::get<0>(tables[0]) == (int)TableSchema::TO_DELETE) {
file_schema.file_type_ = TableFileSchema::TO_DELETE;
}
ConnectorPtr->update(file_schema);
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
} catch (std::exception &e) {
ENGINE_LOG_DEBUG << "table_id= " << file_schema.table_id_ << " file_id=" << file_schema.file_id_;
HandleException(e);
std::string msg = "Exception update table file: table_id = " + file_schema.table_id_
+ " file_id = " + file_schema.file_id_;
return HandleException(msg, e);
}
return Status::OK();
}
Status DBMetaImpl::UpdateTableFiles(TableFilesSchema &files) {
try {
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
MetricCollector metric;
std::map<std::string, bool> has_tables;
for (auto &file : files) {
if(has_tables.find(file.table_id_) != has_tables.end()) {
continue;
}
auto tables = ConnectorPtr->select(columns(&TableSchema::id_),
where(c(&TableSchema::table_id_) == file.table_id_
and c(&TableSchema::state_) != (int) TableSchema::TO_DELETE));
if(tables.size() >= 1) {
has_tables[file.table_id_] = true;
} else {
has_tables[file.table_id_] = false;
}
}
auto commited = ConnectorPtr->transaction([&]() mutable {
for (auto &file : files) {
if(!has_tables[file.table_id_]) {
file.file_type_ = TableFileSchema::TO_DELETE;
}
file.updated_time_ = utils::GetMicroSecTimeStamp();
ConnectorPtr->update(file);
}
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
return true;
});
if (!commited) {
return Status::DBTransactionError("Update files Error");
return Status::DBTransactionError("Update table files error");
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when update table files", e);
}
return Status::OK();
}
@ -797,37 +830,69 @@ Status DBMetaImpl::UpdateTableFiles(TableFilesSchema &files) {
Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) {
auto now = utils::GetMicroSecTimeStamp();
try {
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
&TableFileSchema::file_id_,
&TableFileSchema::file_type_,
&TableFileSchema::size_,
&TableFileSchema::date_),
where(
c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_DELETE
and
c(&TableFileSchema::updated_time_)
> now - seconds * US_PS));
MetricCollector metric;
TableFilesSchema updated;
TableFileSchema table_file;
auto files = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
&TableFileSchema::file_id_,
&TableFileSchema::date_),
where(
c(&TableFileSchema::file_type_) ==
(int) TableFileSchema::TO_DELETE
and
c(&TableFileSchema::updated_time_)
< now - seconds * US_PS));
for (auto &file : selected) {
table_file.id_ = std::get<0>(file);
table_file.table_id_ = std::get<1>(file);
table_file.file_id_ = std::get<2>(file);
table_file.file_type_ = std::get<3>(file);
table_file.size_ = std::get<4>(file);
table_file.date_ = std::get<5>(file);
GetTableFilePath(table_file);
if (table_file.file_type_ == TableFileSchema::TO_DELETE) {
auto commited = ConnectorPtr->transaction([&]() mutable {
TableFileSchema table_file;
for (auto &file : files) {
table_file.id_ = std::get<0>(file);
table_file.table_id_ = std::get<1>(file);
table_file.file_id_ = std::get<2>(file);
table_file.date_ = std::get<3>(file);
GetTableFilePath(table_file);
ENGINE_LOG_DEBUG << "Removing deleted id =" << table_file.id_ << " location = " << table_file.location_ << std::endl;
boost::filesystem::remove(table_file.location_);
ConnectorPtr->remove<TableFileSchema>(table_file.id_);
}
ConnectorPtr->remove<TableFileSchema>(table_file.id_);
/* LOG(DEBUG) << "Removing deleted id=" << table_file.id << " location=" << table_file.location << std::endl; */
return true;
});
if (!commited) {
return Status::DBTransactionError("Clean files error");
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when clean table files", e);
}
try {
MetricCollector metric;
auto tables = ConnectorPtr->select(columns(&TableSchema::id_,
&TableSchema::table_id_),
where(c(&TableSchema::state_) == (int) TableSchema::TO_DELETE));
auto commited = ConnectorPtr->transaction([&]() mutable {
for (auto &table : tables) {
auto table_path = GetTablePath(std::get<1>(table));
ENGINE_LOG_DEBUG << "Remove table folder: " << table_path;
boost::filesystem::remove_all(table_path);
ConnectorPtr->remove<TableSchema>(std::get<0>(table));
}
return true;
});
if (!commited) {
return Status::DBTransactionError("Clean files error");
}
} catch (std::exception &e) {
return HandleException("Encounter exception when clean table files", e);
}
return Status::OK();
@ -835,37 +900,23 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) {
Status DBMetaImpl::CleanUp() {
try {
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_,
&TableFileSchema::table_id_,
&TableFileSchema::file_id_,
&TableFileSchema::file_type_,
&TableFileSchema::size_,
&TableFileSchema::date_),
where(
c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_DELETE
or
c(&TableFileSchema::file_type_)
== (int) TableFileSchema::NEW));
auto files = ConnectorPtr->select(columns(&TableFileSchema::id_),
where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW));
TableFilesSchema updated;
TableFileSchema table_file;
for (auto &file : selected) {
table_file.id_ = std::get<0>(file);
table_file.table_id_ = std::get<1>(file);
table_file.file_id_ = std::get<2>(file);
table_file.file_type_ = std::get<3>(file);
table_file.size_ = std::get<4>(file);
table_file.date_ = std::get<5>(file);
GetTableFilePath(table_file);
if (table_file.file_type_ == TableFileSchema::TO_DELETE) {
boost::filesystem::remove(table_file.location_);
auto commited = ConnectorPtr->transaction([&]() mutable {
for (auto &file : files) {
ENGINE_LOG_DEBUG << "Remove table file type as NEW";
ConnectorPtr->remove<TableFileSchema>(std::get<0>(file));
}
ConnectorPtr->remove<TableFileSchema>(table_file.id_);
/* LOG(DEBUG) << "Removing id=" << table_file.id << " location=" << table_file.location << std::endl; */
return true;
});
if (!commited) {
return Status::DBTransactionError("Clean files error");
}
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when clean table file", e);
}
return Status::OK();
@ -874,20 +925,15 @@ Status DBMetaImpl::CleanUp() {
Status DBMetaImpl::Count(const std::string &table_id, uint64_t &result) {
try {
MetricCollector metric;
auto selected = ConnectorPtr->select(columns(&TableFileSchema::size_),
where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW
or
c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX
or c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX)
and c(&TableFileSchema::table_id_) == table_id));
server::Metrics::GetInstance().MetaAccessTotalIncrement();
auto start_time = METRICS_NOW_TIME;
auto selected = ConnectorPtr->select(columns(&TableFileSchema::size_,
&TableFileSchema::date_),
where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW or
c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX
or
c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX)
and
c(&TableFileSchema::table_id_) == table_id));
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time);
TableSchema table_schema;
table_schema.table_id_ = table_id;
auto status = DescribeTable(table_schema);
@ -905,7 +951,7 @@ Status DBMetaImpl::Count(const std::string &table_id, uint64_t &result) {
result /= sizeof(float);
} catch (std::exception &e) {
HandleException(e);
return HandleException("Encounter exception when calculate table file size", e);
}
return Status::OK();
}

View File

@ -20,16 +20,20 @@ public:
DBMetaImpl(const DBMetaOptions& options_);
virtual Status CreateTable(TableSchema& table_schema) override;
virtual Status DeleteTable(const std::string& table_id) override;
virtual Status DescribeTable(TableSchema& group_info_) override;
virtual Status HasTable(const std::string& table_id, bool& has_or_not) override;
virtual Status AllTables(std::vector<TableSchema>& table_schema_array) override;
virtual Status DeleteTable(const std::string& table_id) override;
virtual Status DeleteTableFiles(const std::string& table_id) override;
virtual Status CreateTableFile(TableFileSchema& file_schema) override;
virtual Status DropPartitionsByDates(const std::string& table_id,
const DatesT& dates) override;
virtual Status GetTableFile(TableFileSchema& file_schema) override;
virtual Status GetTableFiles(const std::string& table_id,
const std::vector<size_t>& ids,
TableFilesSchema& table_files) override;
virtual Status UpdateTableFile(TableFileSchema& file_schema) override;
@ -42,10 +46,6 @@ public:
virtual Status FilesToMerge(const std::string& table_id,
DatePartionedTableFilesSchema& files) override;
virtual Status FilesToDelete(const std::string& table_id,
const DatesT& partition,
DatePartionedTableFilesSchema& files) override;
virtual Status FilesToIndex(TableFilesSchema&) override;
virtual Status Archive() override;

View File

@ -1,87 +0,0 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include <easylogging++.h>
#include <assert.h>
#include <atomic>
#include "Env.h"
namespace zilliz {
namespace milvus {
namespace engine {
Env::Env()
: bg_work_started_(false),
shutting_down_(false) {
}
void Env::Schedule(void (*function)(void* arg), void* arg) {
std::unique_lock<std::mutex> lock(bg_work_mutex_);
if (shutting_down_) return;
if (!bg_work_started_) {
bg_work_started_ = true;
std::thread bg_thread(Env::BackgroundThreadEntryPoint, this);
bg_thread.detach();
}
if (bg_work_queue_.empty()) {
bg_work_cv_.notify_one();
}
bg_work_queue_.emplace(function, arg);
}
void Env::BackgroundThreadMain() {
while (!shutting_down_) {
std::unique_lock<std::mutex> lock(bg_work_mutex_);
while (bg_work_queue_.empty() && !shutting_down_) {
bg_work_cv_.wait(lock);
}
if (shutting_down_) break;
assert(!bg_work_queue_.empty());
auto bg_function = bg_work_queue_.front().function_;
void* bg_arg = bg_work_queue_.front().arg_;
bg_work_queue_.pop();
lock.unlock();
bg_function(bg_arg);
}
std::unique_lock<std::mutex> lock(bg_work_mutex_);
bg_work_started_ = false;
bg_work_cv_.notify_all();
}
void Env::Stop() {
{
std::unique_lock<std::mutex> lock(bg_work_mutex_);
if (shutting_down_ || !bg_work_started_) return;
}
shutting_down_ = true;
{
std::unique_lock<std::mutex> lock(bg_work_mutex_);
if (bg_work_queue_.empty()) {
bg_work_cv_.notify_one();
}
while (bg_work_started_) {
bg_work_cv_.wait(lock);
}
}
shutting_down_ = false;
}
Env::~Env() {}
Env* Env::Default() {
static Env env;
return &env;
}
} // namespace engine
} // namespace milvus
} // namespace zilliz

View File

@ -1,56 +0,0 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include <condition_variable>
#include <thread>
#include <mutex>
#include <queue>
#include <atomic>
namespace zilliz {
namespace milvus {
namespace engine {
class Env {
public:
Env();
Env(const Env&) = delete;
Env& operator=(const Env&) = delete;
void Schedule(void (*function)(void* arg), void* arg);
virtual void Stop();
virtual ~Env();
static Env* Default();
protected:
void BackgroundThreadMain();
static void BackgroundThreadEntryPoint(Env* env) {
env->BackgroundThreadMain();
}
struct BGWork {
explicit BGWork(void (*function)(void*), void* arg)
: function_(function), arg_(arg) {}
void (* const function_)(void*);
void* const arg_;
};
std::mutex bg_work_mutex_;
std::condition_variable bg_work_cv_;
std::queue<BGWork> bg_work_queue_;
bool bg_work_started_;
std::atomic<bool> shutting_down_;
}; // Env
} // namespace engine
} // namespace milvus
} // namespace zilliz

View File

@ -4,8 +4,8 @@
* Proprietary and confidential.
******************************************************************************/
#include "FaissExecutionEngine.h"
#include "Log.h"
#include <easylogging++.h>
#include <faiss/AutoTune.h>
#include <faiss/MetaIndexes.h>
#include <faiss/IndexFlat.h>
@ -74,7 +74,7 @@ Status FaissExecutionEngine::Load() {
if (!index) {
index = read_index(location_);
to_cache = true;
LOG(DEBUG) << "Disk io from: " << location_;
ENGINE_LOG_DEBUG << "Disk io from: " << location_;
}
pIndex_ = index->data();
@ -98,6 +98,8 @@ Status FaissExecutionEngine::Merge(const std::string& location) {
if (location == location_) {
return Status::Error("Cannot Merge Self");
}
ENGINE_LOG_DEBUG << "Merge index file: " << location << " to: " << location_;
auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
if (!to_merge) {
to_merge = read_index(location);
@ -110,6 +112,8 @@ Status FaissExecutionEngine::Merge(const std::string& location) {
ExecutionEnginePtr
FaissExecutionEngine::BuildIndex(const std::string& location) {
ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_;
auto opd = std::make_shared<Operand>();
opd->d = pIndex_->d;
opd->index_type = build_index_type_;
@ -122,7 +126,6 @@ FaissExecutionEngine::BuildIndex(const std::string& location) {
from_index->id_map.data());
ExecutionEnginePtr new_ee(new FaissExecutionEngine(index->data(), location, build_index_type_, raw_index_type_));
new_ee->Serialize();
return new_ee;
}

View File

@ -62,7 +62,7 @@ Status MemVectors::Serialize(std::string& table_id) {
auto status = pMeta_->UpdateTableFile(schema_);
LOG(DEBUG) << "New " << ((schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index")
<< " file " << schema_.file_id_ << " of size " << pEE_->Size() / meta::M << " M";
<< " file " << schema_.file_id_ << " of size " << (double)(pEE_->Size()) / (double)meta::M << " M";
pEE_->Cache();
@ -142,6 +142,13 @@ Status MemManager::Serialize(std::vector<std::string>& table_ids) {
return Status::OK();
}
Status MemManager::EraseMemVector(const std::string& table_id) {
std::unique_lock<std::mutex> lock(mutex_);
memMap_.erase(table_id);
return Status::OK();
}
} // namespace engine
} // namespace milvus

View File

@ -75,6 +75,8 @@ public:
Status Serialize(std::vector<std::string>& table_ids);
Status EraseMemVector(const std::string& table_id);
private:
Status InsertVectorsNoLock(const std::string& table_id,
size_t n, const float* vectors, IDNumbers& vector_ids);

View File

@ -24,31 +24,32 @@ public:
using Ptr = std::shared_ptr<Meta>;
virtual Status CreateTable(TableSchema& table_schema) = 0;
virtual Status DeleteTable(const std::string& table_id) = 0;
virtual Status DescribeTable(TableSchema& table_schema) = 0;
virtual Status HasTable(const std::string& table_id, bool& has_or_not) = 0;
virtual Status AllTables(std::vector<TableSchema>& table_schema_array) = 0;
virtual Status DeleteTable(const std::string& table_id) = 0;
virtual Status DeleteTableFiles(const std::string& table_id) = 0;
virtual Status CreateTableFile(TableFileSchema& file_schema) = 0;
virtual Status DropPartitionsByDates(const std::string& table_id,
const DatesT& dates) = 0;
virtual Status GetTableFile(TableFileSchema& file_schema) = 0;
virtual Status GetTableFiles(const std::string& table_id,
const std::vector<size_t>& ids,
TableFilesSchema& table_files) = 0;
virtual Status UpdateTableFile(TableFileSchema& file_schema) = 0;
virtual Status UpdateTableFiles(TableFilesSchema& files) = 0;
virtual Status FilesToSearch(const std::string& table_id,
const DatesT& partition,
DatePartionedTableFilesSchema& files) = 0;
virtual Status FilesToSearch(const std::string &table_id,
const DatesT &partition,
DatePartionedTableFilesSchema& files) = 0;
virtual Status FilesToMerge(const std::string& table_id,
DatePartionedTableFilesSchema& files) = 0;
virtual Status FilesToDelete(const std::string& table_id,
const DatesT& partition,
DatePartionedTableFilesSchema& files) = 0;
virtual Status Size(uint64_t& result) = 0;
virtual Status Archive() = 0;

View File

@ -21,12 +21,18 @@ const DateT EmptyDate = -1;
typedef std::vector<DateT> DatesT;
struct TableSchema {
size_t id_;
typedef enum {
NORMAL,
TO_DELETE,
} TABLE_STATE;
size_t id_ = 0;
std::string table_id_;
int state_ = (int)NORMAL;
size_t files_cnt_ = 0;
uint16_t dimension_;
uint16_t dimension_ = 0;
std::string location_;
long created_on_;
long created_on_ = 0;
int engine_type_ = (int)EngineType::FAISS_IDMAP;
bool store_raw_data_ = false;
}; // TableSchema
@ -40,17 +46,17 @@ struct TableFileSchema {
TO_DELETE,
} FILE_TYPE;
size_t id_;
size_t id_ = 0;
std::string table_id_;
int engine_type_ = (int)EngineType::FAISS_IDMAP;
std::string file_id_;
int file_type_ = NEW;
size_t size_;
size_t size_ = 0;
DateT date_ = EmptyDate;
uint16_t dimension_;
uint16_t dimension_ = 0;
std::string location_;
long updated_time_;
long created_on_;
long updated_time_ = 0;
long created_on_ = 0;
}; // TableFileSchema
typedef std::vector<TableFileSchema> TableFilesSchema;

View File

@ -9,7 +9,6 @@
#include <boost/algorithm/string.hpp>
#include "Options.h"
#include "Env.h"
#include "DBMetaImpl.h"
#include "Exception.h"
@ -17,8 +16,7 @@ namespace zilliz {
namespace milvus {
namespace engine {
Options::Options()
: env(Env::Default()) {
Options::Options() {
}
ArchiveConf::ArchiveConf(const std::string& type, const std::string& criterias) {

View File

@ -15,6 +15,10 @@ namespace engine {
class Env;
static constexpr uint64_t ONE_KB = 1024;
static constexpr uint64_t ONE_MB = ONE_KB*ONE_KB;
static constexpr uint64_t ONE_GB = ONE_KB*ONE_MB;
struct ArchiveConf {
using CriteriaT = std::map<std::string, int>;
@ -40,10 +44,9 @@ struct DBMetaOptions {
struct Options {
Options();
uint16_t memory_sync_interval = 1;
uint16_t memory_sync_interval = 1; //unit: second
uint16_t merge_trigger_number = 2;
size_t index_trigger_size = 1024*1024*1024;
Env* env;
size_t index_trigger_size = ONE_GB; //unit: byte
DBMetaOptions meta;
}; // Options

View File

@ -7,39 +7,44 @@
#include <string>
namespace zilliz {
namespace milvus {
namespace engine {
class Status {
public:
public:
Status() noexcept : state_(nullptr) {}
~Status() { delete[] state_; }
Status(const Status& rhs);
Status& operator=(const Status& rhs);
Status(const Status &rhs);
Status &operator=(const Status &rhs);
Status(Status&& rhs) noexcept : state_(rhs.state_) { rhs.state_ = nullptr; }
Status& operator=(Status&& rhs_) noexcept;
Status(Status &&rhs) noexcept : state_(rhs.state_) { rhs.state_ = nullptr; }
Status &operator=(Status &&rhs_) noexcept;
static Status OK() { return Status(); }
static Status NotFound(const std::string& msg, const std::string& msg2="") {
static Status NotFound(const std::string &msg, const std::string &msg2 = "") {
return Status(kNotFound, msg, msg2);
}
static Status Error(const std::string& msg, const std::string& msg2="") {
static Status Error(const std::string &msg, const std::string &msg2 = "") {
return Status(kError, msg, msg2);
}
static Status InvalidDBPath(const std::string& msg, const std::string& msg2="") {
static Status InvalidDBPath(const std::string &msg, const std::string &msg2 = "") {
return Status(kInvalidDBPath, msg, msg2);
}
static Status GroupError(const std::string& msg, const std::string& msg2="") {
static Status GroupError(const std::string &msg, const std::string &msg2 = "") {
return Status(kGroupError, msg, msg2);
}
static Status DBTransactionError(const std::string& msg, const std::string& msg2="") {
static Status DBTransactionError(const std::string &msg, const std::string &msg2 = "") {
return Status(kDBTransactionError, msg, msg2);
}
static Status AlreadyExist(const std::string &msg, const std::string &msg2 = "") {
return Status(kAlreadyExist, msg, msg2);
}
bool ok() const { return state_ == nullptr; }
bool IsNotFound() const { return code() == kNotFound; }
@ -48,11 +53,12 @@ public:
bool IsInvalidDBPath() const { return code() == kInvalidDBPath; }
bool IsGroupError() const { return code() == kGroupError; }
bool IsDBTransactionError() const { return code() == kDBTransactionError; }
bool IsAlreadyExist() const { return code() == kAlreadyExist; }
std::string ToString() const;
private:
const char* state_;
private:
const char *state_ = nullptr;
enum Code {
kOK = 0,
@ -62,21 +68,23 @@ private:
kInvalidDBPath,
kGroupError,
kDBTransactionError,
kAlreadyExist,
};
Code code() const {
return (state_ == nullptr) ? kOK : static_cast<Code>(state_[4]);
}
Status(Code code, const std::string& msg, const std::string& msg2);
static const char* CopyState(const char* s);
Status(Code code, const std::string &msg, const std::string &msg2);
static const char *CopyState(const char *s);
}; // Status
inline Status::Status(const Status& rhs) {
inline Status::Status(const Status &rhs) {
state_ = (rhs.state_ == nullptr) ? nullptr : CopyState(rhs.state_);
}
inline Status& Status::operator=(const Status& rhs) {
inline Status &Status::operator=(const Status &rhs) {
if (state_ != rhs.state_) {
delete[] state_;
state_ = (rhs.state_ == nullptr) ? nullptr : CopyState(rhs.state_);
@ -84,7 +92,7 @@ inline Status& Status::operator=(const Status& rhs) {
return *this;
}
inline Status& Status::operator=(Status&& rhs) noexcept {
inline Status &Status::operator=(Status &&rhs) noexcept {
std::swap(state_, rhs.state_);
return *this;
}

View File

@ -1,64 +0,0 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "ScheduleStrategy.h"
#include "cache/CpuCacheMgr.h"
#include "utils/Error.h"
#include "utils/Log.h"
namespace zilliz {
namespace milvus {
namespace engine {
class MemScheduleStrategy : public IScheduleStrategy {
public:
bool Schedule(const SearchContextPtr &search_context, IndexLoaderQueue::LoaderQueue& loader_list) override {
if(search_context == nullptr) {
return false;
}
SearchContext::Id2IndexMap index_files = search_context->GetIndexMap();
//some index loader alread exists
for(auto& loader : loader_list) {
if(index_files.find(loader->file_->id_) != index_files.end()){
SERVER_LOG_INFO << "Append SearchContext to exist IndexLoaderContext";
index_files.erase(loader->file_->id_);
loader->search_contexts_.push_back(search_context);
}
}
//index_files still contains some index files, create new loader
for(auto& pair : index_files) {
SERVER_LOG_INFO << "Create new IndexLoaderContext for: " << pair.second->location_;
IndexLoaderContextPtr new_loader = std::make_shared<IndexLoaderContext>();
new_loader->search_contexts_.push_back(search_context);
new_loader->file_ = pair.second;
auto index = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(pair.second->location_);
if(index != nullptr) {
//if the index file has been in memory, increase its priority
loader_list.push_front(new_loader);
} else {
//index file not in memory, put it to tail
loader_list.push_back(new_loader);
}
}
return true;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ScheduleStrategyPtr StrategyFactory::CreateMemStrategy() {
ScheduleStrategyPtr strategy(new MemScheduleStrategy());
return strategy;
}
}
}
}

View File

@ -1,180 +0,0 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "SearchScheduler.h"
#include "IndexLoaderQueue.h"
#include "SearchTaskQueue.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
#include "metrics/Metrics.h"
#include "db/EngineFactory.h"
namespace zilliz {
namespace milvus {
namespace engine {
namespace {
void CollectFileMetrics(int file_type, size_t file_size) {
switch(file_type) {
case meta::TableFileSchema::RAW:
case meta::TableFileSchema::TO_INDEX: {
server::Metrics::GetInstance().RawFileSizeHistogramObserve(file_size);
server::Metrics::GetInstance().RawFileSizeTotalIncrement(file_size);
server::Metrics::GetInstance().RawFileSizeGaugeSet(file_size);
break;
}
default: {
server::Metrics::GetInstance().IndexFileSizeHistogramObserve(file_size);
server::Metrics::GetInstance().IndexFileSizeTotalIncrement(file_size);
server::Metrics::GetInstance().IndexFileSizeGaugeSet(file_size);
break;
}
}
}
void CollectDurationMetrics(int index_type, double total_time) {
switch(index_type) {
case meta::TableFileSchema::RAW: {
server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time);
break;
}
case meta::TableFileSchema::TO_INDEX: {
server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time);
break;
}
default: {
server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time);
break;
}
}
}
}
SearchScheduler::SearchScheduler()
: stopped_(true) {
Start();
}
SearchScheduler::~SearchScheduler() {
Stop();
}
SearchScheduler& SearchScheduler::GetInstance() {
static SearchScheduler s_instance;
return s_instance;
}
bool
SearchScheduler::Start() {
if(!stopped_) {
return true;
}
stopped_ = false;
search_queue_.SetCapacity(2);
index_load_thread_ = std::make_shared<std::thread>(&SearchScheduler::IndexLoadWorker, this);
search_thread_ = std::make_shared<std::thread>(&SearchScheduler::SearchWorker, this);
return true;
}
bool
SearchScheduler::Stop() {
if(stopped_) {
return true;
}
if(index_load_thread_) {
index_load_queue_.Put(nullptr);
index_load_thread_->join();
index_load_thread_ = nullptr;
}
if(search_thread_) {
search_queue_.Put(nullptr);
search_thread_->join();
search_thread_ = nullptr;
}
stopped_ = true;
return true;
}
bool
SearchScheduler::ScheduleSearchTask(SearchContextPtr& search_context) {
index_load_queue_.Put(search_context);
return true;
}
bool
SearchScheduler::IndexLoadWorker() {
while(true) {
IndexLoaderContextPtr context = index_load_queue_.Take();
if(context == nullptr) {
SERVER_LOG_INFO << "Stop thread for index loading";
break;//exit
}
SERVER_LOG_INFO << "Loading index(" << context->file_->id_ << ") from location: " << context->file_->location_;
server::TimeRecorder rc("Load index");
//step 1: load index
ExecutionEnginePtr index_ptr = EngineFactory::Build(context->file_->dimension_,
context->file_->location_,
(EngineType)context->file_->engine_type_);
index_ptr->Load();
rc.Record("load index file to memory");
size_t file_size = index_ptr->PhysicalSize();
LOG(DEBUG) << "Index file type " << context->file_->file_type_ << " Of Size: "
<< file_size/(1024*1024) << " M";
CollectFileMetrics(context->file_->file_type_, file_size);
//step 2: put search task into another queue
SearchTaskPtr task_ptr = std::make_shared<SearchTask>();
task_ptr->index_id_ = context->file_->id_;
task_ptr->index_type_ = context->file_->file_type_;
task_ptr->index_engine_ = index_ptr;
task_ptr->search_contexts_.swap(context->search_contexts_);
search_queue_.Put(task_ptr);
}
return true;
}
bool
SearchScheduler::SearchWorker() {
while(true) {
SearchTaskPtr task_ptr = search_queue_.Take();
if(task_ptr == nullptr) {
SERVER_LOG_INFO << "Stop thread for searching";
break;//exit
}
SERVER_LOG_INFO << "Searching in index(" << task_ptr->index_id_<< ") with "
<< task_ptr->search_contexts_.size() << " tasks";
//do search
auto start_time = METRICS_NOW_TIME;
task_ptr->DoSearch();
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
CollectDurationMetrics(task_ptr->index_type_, total_time);
}
return true;
}
}
}
}

View File

@ -1,46 +0,0 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "SearchContext.h"
#include "IndexLoaderQueue.h"
#include "SearchTaskQueue.h"
namespace zilliz {
namespace milvus {
namespace engine {
class SearchScheduler {
private:
SearchScheduler();
virtual ~SearchScheduler();
public:
static SearchScheduler& GetInstance();
bool ScheduleSearchTask(SearchContextPtr& search_context);
private:
bool Start();
bool Stop();
bool IndexLoadWorker();
bool SearchWorker();
private:
std::shared_ptr<std::thread> index_load_thread_;
std::shared_ptr<std::thread> search_thread_;
IndexLoaderQueue index_load_queue_;
SearchTaskQueue search_queue_;
bool stopped_ = true;
};
}
}
}

View File

@ -4,8 +4,8 @@
* Proprietary and confidential.
******************************************************************************/
#include "IndexLoaderQueue.h"
#include "ScheduleStrategy.h"
#include "TaskDispatchQueue.h"
#include "TaskDispatchStrategy.h"
#include "utils/Error.h"
#include "utils/Log.h"
@ -14,12 +14,12 @@ namespace milvus {
namespace engine {
void
IndexLoaderQueue::Put(const SearchContextPtr &search_context) {
TaskDispatchQueue::Put(const ScheduleContextPtr &context) {
std::unique_lock <std::mutex> lock(mtx);
full_.wait(lock, [this] { return (queue_.size() < capacity_); });
if(search_context == nullptr) {
queue_.push_back(nullptr);
if(context == nullptr) {
queue_.push_front(nullptr);
empty_.notify_all();
return;
}
@ -32,14 +32,13 @@ IndexLoaderQueue::Put(const SearchContextPtr &search_context) {
throw server::ServerException(server::SERVER_BLOCKING_QUEUE_EMPTY, error_msg);
}
ScheduleStrategyPtr strategy = StrategyFactory::CreateMemStrategy();
strategy->Schedule(search_context, queue_);
TaskDispatchStrategy::Schedule(context, queue_);
empty_.notify_all();
}
IndexLoaderContextPtr
IndexLoaderQueue::Take() {
ScheduleTaskPtr
TaskDispatchQueue::Take() {
std::unique_lock <std::mutex> lock(mtx);
empty_.wait(lock, [this] { return !queue_.empty(); });
@ -49,20 +48,20 @@ IndexLoaderQueue::Take() {
throw server::ServerException(server::SERVER_BLOCKING_QUEUE_EMPTY, error_msg);
}
IndexLoaderContextPtr front(queue_.front());
ScheduleTaskPtr front(queue_.front());
queue_.pop_front();
full_.notify_all();
return front;
}
size_t
IndexLoaderQueue::Size() {
TaskDispatchQueue::Size() {
std::lock_guard <std::mutex> lock(mtx);
return queue_.size();
}
IndexLoaderContextPtr
IndexLoaderQueue::Front() {
ScheduleTaskPtr
TaskDispatchQueue::Front() {
std::unique_lock <std::mutex> lock(mtx);
empty_.wait(lock, [this] { return !queue_.empty(); });
if (queue_.empty()) {
@ -70,12 +69,12 @@ IndexLoaderQueue::Front() {
SERVER_LOG_ERROR << error_msg;
throw server::ServerException(server::SERVER_BLOCKING_QUEUE_EMPTY, error_msg);
}
IndexLoaderContextPtr front(queue_.front());
ScheduleTaskPtr front(queue_.front());
return front;
}
IndexLoaderContextPtr
IndexLoaderQueue::Back() {
ScheduleTaskPtr
TaskDispatchQueue::Back() {
std::unique_lock <std::mutex> lock(mtx);
empty_.wait(lock, [this] { return !queue_.empty(); });
@ -85,18 +84,18 @@ IndexLoaderQueue::Back() {
throw server::ServerException(server::SERVER_BLOCKING_QUEUE_EMPTY, error_msg);
}
IndexLoaderContextPtr back(queue_.back());
ScheduleTaskPtr back(queue_.back());
return back;
}
bool
IndexLoaderQueue::Empty() {
TaskDispatchQueue::Empty() {
std::unique_lock <std::mutex> lock(mtx);
return queue_.empty();
}
void
IndexLoaderQueue::SetCapacity(const size_t capacity) {
TaskDispatchQueue::SetCapacity(const size_t capacity) {
capacity_ = (capacity > 0 ? capacity : capacity_);
}

View File

@ -5,7 +5,8 @@
******************************************************************************/
#pragma once
#include "SearchContext.h"
#include "context/IScheduleContext.h"
#include "task/IScheduleTask.h"
#include <condition_variable>
#include <iostream>
@ -17,31 +18,23 @@ namespace zilliz {
namespace milvus {
namespace engine {
class IndexLoaderContext {
class TaskDispatchQueue {
public:
TableFileSchemaPtr file_;
std::vector<SearchContextPtr> search_contexts_;
};
using IndexLoaderContextPtr = std::shared_ptr<IndexLoaderContext>;
TaskDispatchQueue() : mtx(), full_(), empty_() {}
class IndexLoaderQueue {
public:
IndexLoaderQueue() : mtx(), full_(), empty_() {}
TaskDispatchQueue(const TaskDispatchQueue &rhs) = delete;
IndexLoaderQueue(const IndexLoaderQueue &rhs) = delete;
TaskDispatchQueue &operator=(const TaskDispatchQueue &rhs) = delete;
IndexLoaderQueue &operator=(const IndexLoaderQueue &rhs) = delete;
using TaskList = std::list<ScheduleTaskPtr>;
using LoaderQueue = std::list<IndexLoaderContextPtr>;
void Put(const ScheduleContextPtr &context);
void Put(const SearchContextPtr &search_context);
ScheduleTaskPtr Take();
IndexLoaderContextPtr Take();
ScheduleTaskPtr Front();
IndexLoaderContextPtr Front();
IndexLoaderContextPtr Back();
ScheduleTaskPtr Back();
size_t Size();
@ -54,7 +47,7 @@ private:
std::condition_variable full_;
std::condition_variable empty_;
LoaderQueue queue_;
TaskList queue_;
size_t capacity_ = 1000000;
};

View File

@ -0,0 +1,122 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "TaskDispatchStrategy.h"
#include "context/SearchContext.h"
#include "context/DeleteContext.h"
#include "task/IndexLoadTask.h"
#include "task/DeleteTask.h"
#include "cache/CpuCacheMgr.h"
#include "utils/Error.h"
#include "db/Log.h"
namespace zilliz {
namespace milvus {
namespace engine {
class ReuseCacheIndexStrategy {
public:
bool Schedule(const SearchContextPtr &context, std::list<ScheduleTaskPtr>& task_list) {
if(context == nullptr) {
return false;
}
SearchContext::Id2IndexMap index_files = context->GetIndexMap();
//some index loader alread exists
for(auto& task : task_list) {
if(task->type() != ScheduleTaskType::kIndexLoad) {
continue;
}
IndexLoadTaskPtr loader = std::static_pointer_cast<IndexLoadTask>(task);
if(index_files.find(loader->file_->id_) != index_files.end()){
ENGINE_LOG_INFO << "Append SearchContext to exist IndexLoaderContext";
index_files.erase(loader->file_->id_);
loader->search_contexts_.push_back(context);
}
}
//index_files still contains some index files, create new loader
for(auto& pair : index_files) {
ENGINE_LOG_INFO << "Create new IndexLoaderContext for: " << pair.second->location_;
IndexLoadTaskPtr new_loader = std::make_shared<IndexLoadTask>();
new_loader->search_contexts_.push_back(context);
new_loader->file_ = pair.second;
auto index = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(pair.second->location_);
if(index != nullptr) {
//if the index file has been in memory, increase its priority
task_list.push_front(new_loader);
} else {
//index file not in memory, put it to tail
task_list.push_back(new_loader);
}
}
return true;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class DeleteTableStrategy {
public:
bool Schedule(const DeleteContextPtr &context, std::list<ScheduleTaskPtr> &task_list) {
if (context == nullptr) {
return false;
}
DeleteTaskPtr delete_task = std::make_shared<DeleteTask>(context);
if(task_list.empty()) {
task_list.push_back(delete_task);
return true;
}
std::string table_id = context->table_id();
for(auto iter = task_list.begin(); iter != task_list.end(); ++iter) {
if((*iter)->type() != ScheduleTaskType::kIndexLoad) {
continue;
}
//put delete task to proper position
IndexLoadTaskPtr loader = std::static_pointer_cast<IndexLoadTask>(*iter);
if(loader->file_->table_id_ == table_id) {
task_list.insert(++iter, delete_task);
break;
}
}
return true;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool TaskDispatchStrategy::Schedule(const ScheduleContextPtr &context_ptr,
std::list<zilliz::milvus::engine::ScheduleTaskPtr> &task_list) {
if(context_ptr == nullptr) {
return false;
}
switch(context_ptr->type()) {
case ScheduleContextType::kSearch: {
SearchContextPtr search_context = std::static_pointer_cast<SearchContext>(context_ptr);
ReuseCacheIndexStrategy strategy;
return strategy.Schedule(search_context, task_list);
}
case ScheduleContextType::kDelete: {
DeleteContextPtr delete_context = std::static_pointer_cast<DeleteContext>(context_ptr);
DeleteTableStrategy strategy;
return strategy.Schedule(delete_context, task_list);
}
default:
ENGINE_LOG_ERROR << "Invalid schedule task type";
return false;
}
}
}
}
}

View File

@ -5,18 +5,18 @@
******************************************************************************/
#pragma once
#include "IScheduleStrategy.h"
#include "context/IScheduleContext.h"
#include "task/IScheduleTask.h"
#include <list>
namespace zilliz {
namespace milvus {
namespace engine {
class StrategyFactory {
private:
StrategyFactory() {}
class TaskDispatchStrategy {
public:
static ScheduleStrategyPtr CreateMemStrategy();
static bool Schedule(const ScheduleContextPtr &context_ptr, std::list<ScheduleTaskPtr>& task_list);
};
}

View File

@ -0,0 +1,117 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "TaskScheduler.h"
#include "TaskDispatchQueue.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
#include "db/EngineFactory.h"
namespace zilliz {
namespace milvus {
namespace engine {
TaskScheduler::TaskScheduler()
: stopped_(true) {
Start();
}
TaskScheduler::~TaskScheduler() {
Stop();
}
TaskScheduler& TaskScheduler::GetInstance() {
static TaskScheduler s_instance;
return s_instance;
}
bool
TaskScheduler::Start() {
if(!stopped_) {
return true;
}
stopped_ = false;
task_queue_.SetCapacity(2);
task_dispatch_thread_ = std::make_shared<std::thread>(&TaskScheduler::TaskDispatchWorker, this);
task_thread_ = std::make_shared<std::thread>(&TaskScheduler::TaskWorker, this);
return true;
}
bool
TaskScheduler::Stop() {
if(stopped_) {
return true;
}
if(task_dispatch_thread_) {
task_dispatch_queue_.Put(nullptr);
task_dispatch_thread_->join();
task_dispatch_thread_ = nullptr;
}
if(task_thread_) {
task_queue_.Put(nullptr);
task_thread_->join();
task_thread_ = nullptr;
}
stopped_ = true;
return true;
}
bool
TaskScheduler::Schedule(ScheduleContextPtr context) {
task_dispatch_queue_.Put(context);
return true;
}
bool
TaskScheduler::TaskDispatchWorker() {
while(true) {
ScheduleTaskPtr task_ptr = task_dispatch_queue_.Take();
if(task_ptr == nullptr) {
SERVER_LOG_INFO << "Stop db task dispatch thread";
break;//exit
}
//execute task
ScheduleTaskPtr next_task = task_ptr->Execute();
if(next_task != nullptr) {
task_queue_.Put(next_task);
}
}
return true;
}
bool
TaskScheduler::TaskWorker() {
while(true) {
ScheduleTaskPtr task_ptr = task_queue_.Take();
if(task_ptr == nullptr) {
SERVER_LOG_INFO << "Stop db task thread";
break;//exit
}
//execute task
ScheduleTaskPtr next_task = task_ptr->Execute();
if(next_task != nullptr) {
task_queue_.Put(next_task);
}
}
return true;
}
}
}
}

View File

@ -0,0 +1,49 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "context/IScheduleContext.h"
#include "task/IScheduleTask.h"
#include "TaskDispatchQueue.h"
#include "utils/BlockingQueue.h"
namespace zilliz {
namespace milvus {
namespace engine {
class TaskScheduler {
private:
TaskScheduler();
virtual ~TaskScheduler();
public:
static TaskScheduler& GetInstance();
bool Schedule(ScheduleContextPtr context);
private:
bool Start();
bool Stop();
bool TaskDispatchWorker();
bool TaskWorker();
private:
std::shared_ptr<std::thread> task_dispatch_thread_;
std::shared_ptr<std::thread> task_thread_;
TaskDispatchQueue task_dispatch_queue_;
using TaskQueue = server::BlockingQueue<ScheduleTaskPtr>;
TaskQueue task_queue_;
bool stopped_ = true;
};
}
}
}

View File

@ -0,0 +1,22 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "DeleteContext.h"
namespace zilliz {
namespace milvus {
namespace engine {
DeleteContext::DeleteContext(const std::string& table_id, meta::Meta::Ptr& meta_ptr)
: IScheduleContext(ScheduleContextType::kDelete),
table_id_(table_id),
meta_ptr_(meta_ptr) {
}
}
}
}

View File

@ -0,0 +1,31 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "IScheduleContext.h"
#include "db/Meta.h"
namespace zilliz {
namespace milvus {
namespace engine {
class DeleteContext : public IScheduleContext {
public:
DeleteContext(const std::string& table_id, meta::Meta::Ptr& meta_ptr);
std::string table_id() const { return table_id_; }
meta::Meta::Ptr meta() const { return meta_ptr_; }
private:
std::string table_id_;
meta::Meta::Ptr meta_ptr_;
};
using DeleteContextPtr = std::shared_ptr<DeleteContext>;
}
}
}

View File

@ -0,0 +1,38 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include <memory>
namespace zilliz {
namespace milvus {
namespace engine {
enum class ScheduleContextType {
kUnknown = 0,
kSearch,
kDelete,
};
class IScheduleContext {
public:
IScheduleContext(ScheduleContextType type)
: type_(type) {
}
virtual ~IScheduleContext() = default;
ScheduleContextType type() const { return type_; }
protected:
ScheduleContextType type_;
};
using ScheduleContextPtr = std::shared_ptr<IScheduleContext>;
}
}
}

View File

@ -14,7 +14,8 @@ namespace milvus {
namespace engine {
SearchContext::SearchContext(uint64_t topk, uint64_t nq, const float* vectors)
: topk_(topk),
: IScheduleContext(ScheduleContextType::kSearch),
topk_(topk),
nq_(nq),
vectors_(vectors) {
//use current time to identify this context

View File

@ -5,6 +5,7 @@
******************************************************************************/
#pragma once
#include "IScheduleContext.h"
#include "db/MetaTypes.h"
#include <unordered_map>
@ -18,7 +19,7 @@ namespace engine {
using TableFileSchemaPtr = std::shared_ptr<meta::TableFileSchema>;
class SearchContext {
class SearchContext : public IScheduleContext {
public:
SearchContext(uint64_t topk, uint64_t nq, const float* vectors);

View File

@ -0,0 +1,30 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "DeleteTask.h"
namespace zilliz {
namespace milvus {
namespace engine {
DeleteTask::DeleteTask(const DeleteContextPtr& context)
: IScheduleTask(ScheduleTaskType::kDelete),
context_(context) {
}
std::shared_ptr<IScheduleTask> DeleteTask::Execute() {
if(context_ != nullptr && context_->meta() != nullptr) {
context_->meta()->DeleteTableFiles(context_->table_id());
}
return nullptr;
}
}
}
}

View File

@ -5,22 +5,25 @@
******************************************************************************/
#pragma once
#include "IndexLoaderQueue.h"
#include "SearchContext.h"
#include "IScheduleTask.h"
#include "db/scheduler/context/DeleteContext.h"
namespace zilliz {
namespace milvus {
namespace engine {
class IScheduleStrategy {
class DeleteTask : public IScheduleTask {
public:
virtual ~IScheduleStrategy() {}
DeleteTask(const DeleteContextPtr& context);
virtual bool Schedule(const SearchContextPtr &search_context, IndexLoaderQueue::LoaderQueue& loader_list) = 0;
virtual std::shared_ptr<IScheduleTask> Execute() override;
private:
DeleteContextPtr context_;
};
using ScheduleStrategyPtr = std::shared_ptr<IScheduleStrategy>;
using DeleteTaskPtr = std::shared_ptr<DeleteTask>;
}
}
}
}

View File

@ -0,0 +1,41 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include <memory>
namespace zilliz {
namespace milvus {
namespace engine {
enum class ScheduleTaskType {
kUnknown = 0,
kIndexLoad,
kSearch,
kDelete,
};
class IScheduleTask {
public:
IScheduleTask(ScheduleTaskType type)
: type_(type) {
}
virtual ~IScheduleTask() = default;
ScheduleTaskType type() const { return type_; }
virtual std::shared_ptr<IScheduleTask> Execute() = 0;
protected:
ScheduleTaskType type_;
};
using ScheduleTaskPtr = std::shared_ptr<IScheduleTask>;
}
}
}

View File

@ -0,0 +1,72 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "IndexLoadTask.h"
#include "SearchTask.h"
#include "db/Log.h"
#include "db/EngineFactory.h"
#include "utils/TimeRecorder.h"
#include "metrics/Metrics.h"
namespace zilliz {
namespace milvus {
namespace engine {
namespace {
void CollectFileMetrics(int file_type, size_t file_size) {
switch(file_type) {
case meta::TableFileSchema::RAW:
case meta::TableFileSchema::TO_INDEX: {
server::Metrics::GetInstance().RawFileSizeHistogramObserve(file_size);
server::Metrics::GetInstance().RawFileSizeTotalIncrement(file_size);
server::Metrics::GetInstance().RawFileSizeGaugeSet(file_size);
break;
}
default: {
server::Metrics::GetInstance().IndexFileSizeHistogramObserve(file_size);
server::Metrics::GetInstance().IndexFileSizeTotalIncrement(file_size);
server::Metrics::GetInstance().IndexFileSizeGaugeSet(file_size);
break;
}
}
}
}
IndexLoadTask::IndexLoadTask()
: IScheduleTask(ScheduleTaskType::kIndexLoad) {
}
std::shared_ptr<IScheduleTask> IndexLoadTask::Execute() {
ENGINE_LOG_INFO << "Loading index(" << file_->id_ << ") from location: " << file_->location_;
server::TimeRecorder rc("Load index");
//step 1: load index
ExecutionEnginePtr index_ptr = EngineFactory::Build(file_->dimension_,
file_->location_,
(EngineType)file_->engine_type_);
index_ptr->Load();
rc.Record("load index file to memory");
size_t file_size = index_ptr->PhysicalSize();
LOG(DEBUG) << "Index file type " << file_->file_type_ << " Of Size: "
<< file_size/(1024*1024) << " M";
CollectFileMetrics(file_->file_type_, file_size);
//step 2: return search task for later execution
SearchTaskPtr task_ptr = std::make_shared<SearchTask>();
task_ptr->index_id_ = file_->id_;
task_ptr->index_type_ = file_->file_type_;
task_ptr->index_engine_ = index_ptr;
task_ptr->search_contexts_.swap(search_contexts_);
return std::static_pointer_cast<IScheduleTask>(task_ptr);
}
}
}
}

View File

@ -0,0 +1,30 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "IScheduleTask.h"
#include "db/scheduler/context/SearchContext.h"
namespace zilliz {
namespace milvus {
namespace engine {
class IndexLoadTask : public IScheduleTask {
public:
IndexLoadTask();
virtual std::shared_ptr<IScheduleTask> Execute() override;
public:
TableFileSchemaPtr file_;
std::vector<SearchContextPtr> search_contexts_;
};
using IndexLoadTaskPtr = std::shared_ptr<IndexLoadTask>;
}
}
}

View File

@ -3,7 +3,8 @@
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "SearchTaskQueue.h"
#include "SearchTask.h"
#include "metrics/Metrics.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
@ -18,10 +19,15 @@ void ClusterResult(const std::vector<long> &output_ids,
uint64_t topk,
SearchContext::ResultSet &result_set) {
result_set.clear();
result_set.reserve(nq);
for (auto i = 0; i < nq; i++) {
SearchContext::Id2ScoreMap id_score;
id_score.reserve(topk);
for (auto k = 0; k < topk; k++) {
uint64_t index = i * topk + k;
if(output_ids[index] < 0) {
continue;
}
id_score.push_back(std::make_pair(output_ids[index], output_distence[index]));
}
result_set.emplace_back(id_score);
@ -29,20 +35,60 @@ void ClusterResult(const std::vector<long> &output_ids,
}
void MergeResult(SearchContext::Id2ScoreMap &score_src,
SearchContext::Id2ScoreMap &score_target,
uint64_t topk) {
for (auto& pair_src : score_src) {
for (auto iter = score_target.begin(); iter != score_target.end(); ++iter) {
if(pair_src.second > iter->second) {
score_target.insert(iter, pair_src);
SearchContext::Id2ScoreMap &score_target,
uint64_t topk) {
//Note: the score_src and score_target are already arranged by score in ascending order
if(score_src.empty()) {
return;
}
if(score_target.empty()) {
score_target.swap(score_src);
return;
}
size_t src_count = score_src.size();
size_t target_count = score_target.size();
SearchContext::Id2ScoreMap score_merged;
score_merged.reserve(topk);
size_t src_index = 0, target_index = 0;
while(true) {
//all score_src items are merged, if score_merged.size() still less than topk
//move items from score_target to score_merged until score_merged.size() equal topk
if(src_index >= src_count) {
for(size_t i = target_index; i < target_count && score_merged.size() < topk; ++i) {
score_merged.push_back(score_target[i]);
}
break;
}
//all score_target items are merged, if score_merged.size() still less than topk
//move items from score_src to score_merged until score_merged.size() equal topk
if(target_index >= target_count) {
for(size_t i = src_index; i < src_count && score_merged.size() < topk; ++i) {
score_merged.push_back(score_src[i]);
}
break;
}
//compare score, put smallest score to score_merged one by one
auto& src_pair = score_src[src_index];
auto& target_pair = score_target[target_index];
if(src_pair.second > target_pair.second) {
score_merged.push_back(target_pair);
target_index++;
} else {
score_merged.push_back(src_pair);
src_index++;
}
//score_merged.size() already equal topk
if(score_merged.size() >= topk) {
break;
}
}
//remove unused items
while (score_target.size() > topk) {
score_target.pop_back();
}
score_target.swap(score_merged);
}
void TopkResult(SearchContext::ResultSet &result_src,
@ -65,42 +111,42 @@ void TopkResult(SearchContext::ResultSet &result_src,
}
}
void CalcScore(uint64_t vector_count,
const float *vectors_data,
uint64_t dimension,
const SearchContext::ResultSet &result_src,
SearchContext::ResultSet &result_target) {
result_target.clear();
if(result_src.empty()){
return;
}
int vec_index = 0;
for(auto& result : result_src) {
const float * vec_data = vectors_data + vec_index*dimension;
double vec_len = 0;
for(uint64_t i = 0; i < dimension; i++) {
vec_len += vec_data[i]*vec_data[i];
void CollectDurationMetrics(int index_type, double total_time) {
switch(index_type) {
case meta::TableFileSchema::RAW: {
server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time);
break;
}
vec_index++;
SearchContext::Id2ScoreMap score_array;
for(auto& pair : result) {
score_array.push_back(std::make_pair(pair.first, (1 - pair.second/vec_len)*100.0));
case meta::TableFileSchema::TO_INDEX: {
server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time);
break;
}
default: {
server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time);
break;
}
result_target.emplace_back(score_array);
}
}
}
bool SearchTask::DoSearch() {
SearchTask::SearchTask()
: IScheduleTask(ScheduleTaskType::kSearch) {
}
std::shared_ptr<IScheduleTask> SearchTask::Execute() {
if(index_engine_ == nullptr) {
return false;
return nullptr;
}
SERVER_LOG_INFO << "Searching in index(" << index_id_<< ") with "
<< search_contexts_.size() << " tasks";
server::TimeRecorder rc("DoSearch index(" + std::to_string(index_id_) + ")");
auto start_time = METRICS_NOW_TIME;
std::vector<long> output_ids;
std::vector<float> output_distence;
for(auto& context : search_contexts_) {
@ -109,8 +155,8 @@ bool SearchTask::DoSearch() {
output_ids.resize(inner_k*context->nq());
output_distence.resize(inner_k*context->nq());
//step 2: search
try {
//step 2: search
index_engine_->Search(context->nq(), context->vectors(), inner_k, output_distence.data(),
output_ids.data());
@ -125,24 +171,23 @@ bool SearchTask::DoSearch() {
TopkResult(result_set, inner_k, context->GetResult());
rc.Record("reduce topk");
//step 5: calculate score between 0 ~ 100
CalcScore(context->nq(), context->vectors(), index_engine_->Dimension(), context->GetResult(), result_set);
context->GetResult().swap(result_set);
rc.Record("calculate score");
} catch (std::exception& ex) {
SERVER_LOG_ERROR << "SearchTask encounter exception: " << ex.what();
context->IndexSearchDone(index_id_);//mark as done avoid dead lock, even search failed
continue;
}
//step 6: notify to send result to client
//step 5: notify to send result to client
context->IndexSearchDone(index_id_);
}
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
CollectDurationMetrics(index_type_, total_time);
rc.Elapse("totally cost");
return true;
return nullptr;
}
}

View File

@ -5,19 +5,19 @@
******************************************************************************/
#pragma once
#include "SearchContext.h"
#include "utils/BlockingQueue.h"
#include "IScheduleTask.h"
#include "db/scheduler/context/SearchContext.h"
#include "db/ExecutionEngine.h"
#include <memory>
namespace zilliz {
namespace milvus {
namespace engine {
class SearchTask {
class SearchTask : public IScheduleTask {
public:
bool DoSearch();
SearchTask();
virtual std::shared_ptr<IScheduleTask> Execute() override;
public:
size_t index_id_ = 0;
@ -27,7 +27,6 @@ public:
};
using SearchTaskPtr = std::shared_ptr<SearchTask>;
using SearchTaskQueue = server::BlockingQueue<SearchTaskPtr>;
}

View File

@ -47,27 +47,27 @@ LicenseCheck::LegalityCheck(const std::string &license_file_path) {
end_time);
if(err !=SERVER_SUCCESS)
{
printf("License check error: 01\n");
std::cout << "License check error: 01" << std::endl;
return SERVER_UNEXPECTED_ERROR;
}
time_t system_time;
LicenseLibrary::GetSystemTime(system_time);
if (device_count != output_device_count) {
printf("License check error: 02\n");
std::cout << "License check error: 02" << std::endl;
return SERVER_UNEXPECTED_ERROR;
}
for (int i = 0; i < device_count; ++i) {
if (sha_array[i] != uuid_encryption_map[i]) {
printf("License check error: 03\n");
std::cout << "License check error: 03" << std::endl;
return SERVER_UNEXPECTED_ERROR;
}
}
if (system_time < starting_time || system_time > end_time) {
printf("License check error: 04\n");
std::cout << "License check error: 04" << std::endl;
return SERVER_UNEXPECTED_ERROR;
}
printf("Legality Check Success\n");
std::cout << "Legality Check Success" << std::endl;
return SERVER_SUCCESS;
}
@ -80,11 +80,11 @@ LicenseCheck::AlterFile(const std::string &license_file_path,
ServerError err = LicenseCheck::LegalityCheck(license_file_path);
if(err!=SERVER_SUCCESS) {
printf("license file check error\n");
std::cout << "license file check error" << std::endl;
exit(1);
}
printf("---runing---\n");
std::cout << "---runing---" << std::endl;
pt->expires_at(pt->expires_at() + boost::posix_time::hours(1));
pt->async_wait(boost::bind(LicenseCheck::AlterFile, license_file_path, boost::asio::placeholders::error, pt));
@ -96,7 +96,7 @@ ServerError
LicenseCheck::StartCountingDown(const std::string &license_file_path) {
if (!LicenseLibrary::IsFileExistent(license_file_path)) {
printf("license file not exist\n");
std::cout << "license file not exist" << std::endl;
exit(1);
}

View File

@ -26,8 +26,8 @@ using namespace zilliz::milvus;
int
main(int argc, char *argv[]) {
printf("\nWelcome to use Milvus by Zillz!\n");
printf("Milvus %s version: v%s built at %s\n", BUILD_TYPE, MEGASEARCH_VERSION, BUILD_TIME);
std::cout << std::endl << "Welcome to use Milvus by Zillz!" << std::endl;
std::cout << "Milvus " << BUILD_TYPE << " version: v" << MILVUS_VERSION << " built at " << BUILD_TIME << std::endl;
signal(SIGINT, server::SignalUtil::HandleSignal);
signal(SIGSEGV, server::SignalUtil::HandleSignal);
@ -53,7 +53,7 @@ main(int argc, char *argv[]) {
if(argc < 2) {
print_help(app_name);
printf("Milvus server exit...\n");
std::cout << "Milvus server exit..." << std::endl;
return EXIT_FAILURE;
}
@ -64,14 +64,14 @@ main(int argc, char *argv[]) {
char *config_filename_ptr = strdup(optarg);
config_filename = config_filename_ptr;
free(config_filename_ptr);
printf("Loading configuration from: %s\n", config_filename.c_str());
std::cout << "Loading configuration from: " << config_filename << std::endl;
break;
}
case 'l': {
char *log_filename_ptr = strdup(optarg);
log_config_file = log_filename_ptr;
free(log_filename_ptr);
printf("Initial log config from: %s\n", log_config_file.c_str());
std::cout << "Initial log config from: " << log_config_file << std::endl;
break;
}
@ -79,7 +79,7 @@ main(int argc, char *argv[]) {
char *pid_filename_ptr = strdup(optarg);
pid_filename = pid_filename_ptr;
free(pid_filename_ptr);
printf("%s\n", pid_filename.c_str());
std::cout << pid_filename << std::endl;
break;
}
@ -107,11 +107,11 @@ main(int argc, char *argv[]) {
void
print_help(const std::string &app_name) {
printf("\n Usage: %s [OPTIONS]\n\n", app_name.c_str());
printf(" Options:\n");
printf(" -h --help Print this help\n");
printf(" -c --conf_file filename Read configuration from the file\n");
printf(" -d --daemon Daemonize this application\n");
printf(" -p --pid_file filename PID file used by daemonized app\n");
printf("\n");
std::cout << std::endl<< "Usage: " << app_name << " [OPTIONS]" << std::endl << std::endl;
std::cout << " Options:" << std::endl;
std::cout << " -h --help Print this help" << std::endl;
std::cout << " -c --conf_file filename Read configuration from the file" << std::endl;
std::cout << " -d --daemon Daemonize this application" << std::endl;
std::cout << " -p --pid_file filename PID file used by daemonized app" << std::endl;
std::cout << std::endl;
}

View File

@ -14,20 +14,22 @@ include_directories(/usr/include)
include_directories(${CMAKE_SOURCE_DIR}/src/thrift/gen-cpp)
set(service_files
${CMAKE_SOURCE_DIR}/src/thrift/gen-cpp/MegasearchService.cpp
${CMAKE_SOURCE_DIR}/src/thrift/gen-cpp/megasearch_constants.cpp
${CMAKE_SOURCE_DIR}/src/thrift/gen-cpp/megasearch_types.cpp
${CMAKE_SOURCE_DIR}/src/thrift/gen-cpp/MilvusService.cpp
${CMAKE_SOURCE_DIR}/src/thrift/gen-cpp/milvus_constants.cpp
${CMAKE_SOURCE_DIR}/src/thrift/gen-cpp/milvus_types.cpp
)
add_library(megasearch_sdk STATIC
add_library(milvus_sdk STATIC
${interface_files}
${client_files}
${util_files}
${service_files}
)
target_link_libraries(megasearch_sdk
target_link_libraries(milvus_sdk
${third_party_libs}
)
add_subdirectory(examples)
install(TARGETS milvus_sdk DESTINATION bin)

View File

@ -7,9 +7,9 @@
aux_source_directory(src src_files)
include_directories(src)
include_directories(../../megasearch_sdk/include)
include_directories(../../include)
link_directories(${CMAKE_BINARY_DIR}/megasearch_sdk)
link_directories(${CMAKE_BINARY_DIR})
add_executable(sdk_simple
./main.cpp
@ -17,6 +17,8 @@ add_executable(sdk_simple
)
target_link_libraries(sdk_simple
megasearch_sdk
milvus_sdk
pthread
)
install(TARGETS sdk_simple DESTINATION bin)

View File

@ -25,7 +25,7 @@ main(int argc, char *argv[]) {
{NULL, 0, 0, 0}};
int option_index = 0;
std::string address = "127.0.0.1", port = "33001";
std::string address = "127.0.0.1", port = "19530";
app_name = argv[0];
int value;
@ -62,7 +62,7 @@ print_help(const std::string &app_name) {
printf("\n Usage: %s [OPTIONS]\n\n", app_name.c_str());
printf(" Options:\n");
printf(" -s --server Server address, default 127.0.0.1\n");
printf(" -p --port Server port, default 33001\n");
printf(" -p --port Server port, default 19530\n");
printf(" -h --help Print help information\n");
printf("\n");
}

View File

@ -4,26 +4,28 @@
* Proprietary and confidential.
******************************************************************************/
#include "ClientTest.h"
#include "MegaSearch.h"
#include "MilvusApi.h"
#include <iostream>
#include <time.h>
#include <unistd.h>
using namespace megasearch;
using namespace ::milvus;
namespace {
std::string GetTableName();
static const std::string TABLE_NAME = GetTableName();
static constexpr int64_t TABLE_DIMENSION = 512;
static constexpr int64_t TOTAL_ROW_COUNT = 100000;
static constexpr int64_t BATCH_ROW_COUNT = 100000;
static constexpr int64_t NQ = 10;
static constexpr int64_t TOP_K = 10;
static constexpr int64_t SEARCH_TARGET = 5000; //change this value, result is different
static constexpr int64_t ADD_VECTOR_LOOP = 5;
#define BLOCK_SPLITER std::cout << "===========================================" << std::endl;
void PrintTableSchema(const megasearch::TableSchema& tb_schema) {
void PrintTableSchema(const TableSchema& tb_schema) {
BLOCK_SPLITER
std::cout << "Table name: " << tb_schema.table_name << std::endl;
std::cout << "Table index type: " << (int)tb_schema.index_type << std::endl;
@ -95,7 +97,7 @@ namespace {
TableSchema BuildTableSchema() {
TableSchema tb_schema;
tb_schema.table_name = TABLE_NAME;
tb_schema.index_type = IndexType::gpu_ivfflat;
tb_schema.index_type = IndexType::cpu_idmap;
tb_schema.dimension = TABLE_DIMENSION;
tb_schema.store_raw_vector = true;
@ -109,17 +111,21 @@ namespace {
}
vector_record_array.clear();
for (int64_t k = from; k < to; k++) {
RowRecord record;
record.data.resize(TABLE_DIMENSION);
for(int64_t i = 0; i < TABLE_DIMENSION; i++) {
record.data[i] = (float)(i + k);
record.data[i] = (float)(k%(i+1));
}
vector_record_array.emplace_back(record);
}
}
void Sleep(int seconds) {
std::cout << "Waiting " << seconds << " seconds ..." << std::endl;
sleep(seconds);
}
}
void
@ -134,7 +140,7 @@ ClientTest::Test(const std::string& address, const std::string& port) {
{//server version
std::string version = conn->ServerVersion();
std::cout << "MegaSearch server version: " << version << std::endl;
std::cout << "Server version: " << version << std::endl;
}
{//sdk version
@ -156,9 +162,9 @@ ClientTest::Test(const std::string& address, const std::string& port) {
{//create table
TableSchema tb_schema = BuildTableSchema();
PrintTableSchema(tb_schema);
Status stat = conn->CreateTable(tb_schema);
std::cout << "CreateTable function call status: " << stat.ToString() << std::endl;
PrintTableSchema(tb_schema);
}
{//describe table
@ -168,9 +174,9 @@ ClientTest::Test(const std::string& address, const std::string& port) {
PrintTableSchema(tb_schema);
}
{//add vectors
for(int i = 0; i < ADD_VECTOR_LOOP; i++){//add vectors
std::vector<RowRecord> record_array;
BuildVectors(0, TOTAL_ROW_COUNT, record_array);
BuildVectors(i*BATCH_ROW_COUNT, (i+1)*BATCH_ROW_COUNT, record_array);
std::vector<int64_t> record_ids;
Status stat = conn->AddVector(TABLE_NAME, record_array, record_ids);
std::cout << "AddVector function call status: " << stat.ToString() << std::endl;
@ -178,10 +184,10 @@ ClientTest::Test(const std::string& address, const std::string& port) {
}
{//search vectors
std::cout << "Waiting data persist. Sleep 10 seconds ..." << std::endl;
sleep(10);
Sleep(2);
std::vector<RowRecord> record_array;
BuildVectors(SEARCH_TARGET, SEARCH_TARGET + 10, record_array);
BuildVectors(SEARCH_TARGET, SEARCH_TARGET + NQ, record_array);
std::vector<Range> query_range_array;
Range rg;

View File

@ -6,9 +6,9 @@
#include <vector>
#include <memory>
/** \brief MegaSearch SDK namespace
/** \brief Milvus SDK namespace
*/
namespace megasearch {
namespace milvus {
/**
@ -119,7 +119,7 @@ public:
* Connect function should be called before any operations
* Server will be connected after Connect return OK
*
* @param uri, use to provide server information, example: megasearch://ipaddress:port
* @param uri, use to provide server information, example: milvus://ipaddress:port
*
* @return Indicate if connect is successful
*/

View File

@ -3,9 +3,9 @@
#include <string>
#include <sstream>
/** \brief MegaSearch SDK namespace
/** \brief Milvus SDK namespace
*/
namespace megasearch {
namespace milvus {
/**
* @brief Status Code for SDK interface return

View File

@ -6,7 +6,7 @@
#include "ClientProxy.h"
#include "util/ConvertUtil.h"
namespace megasearch {
namespace milvus {
std::shared_ptr<ThriftClient>&
ClientProxy::ClientPtr() const {

View File

@ -5,10 +5,10 @@
******************************************************************************/
#pragma once
#include "MegaSearch.h"
#include "MilvusApi.h"
#include "ThriftClient.h"
namespace megasearch {
namespace milvus {
class ClientProxy : public Connection {
public:

View File

@ -5,8 +5,8 @@
******************************************************************************/
#include "ThriftClient.h"
#include "megasearch_types.h"
#include "megasearch_constants.h"
#include "milvus_types.h"
#include "milvus_constants.h"
#include <exception>
@ -21,7 +21,7 @@
#include <thrift/transport/TBufferTransports.h>
#include <thrift/concurrency/PosixThreadFactory.h>
namespace megasearch {
namespace milvus {
using namespace ::apache::thrift;
using namespace ::apache::thrift::protocol;
@ -36,7 +36,7 @@ ThriftClient::~ThriftClient() {
}
MegasearchServiceClientPtr
ServiceClientPtr
ThriftClient::interface() {
if(client_ == nullptr) {
throw std::exception();
@ -62,10 +62,10 @@ ThriftClient::Connect(const std::string& address, int32_t port, const std::strin
}
transport_ptr->open();
client_ = std::make_shared<thrift::MegasearchServiceClient>(protocol_ptr);
client_ = std::make_shared<thrift::MilvusServiceClient>(protocol_ptr);
} catch ( std::exception& ex) {
//CLIENT_LOG_ERROR << "connect encounter exception: " << ex.what();
return Status(StatusCode::NotConnected, "failed to connect megasearch server" + std::string(ex.what()));
return Status(StatusCode::NotConnected, "failed to connect server" + std::string(ex.what()));
}
return Status::OK();

View File

@ -5,14 +5,14 @@
******************************************************************************/
#pragma once
#include "MegasearchService.h"
#include "MilvusService.h"
#include "Status.h"
#include <memory>
namespace megasearch {
namespace milvus {
using MegasearchServiceClientPtr = std::shared_ptr<megasearch::thrift::MegasearchServiceClient>;
using ServiceClientPtr = std::shared_ptr<::milvus::thrift::MilvusServiceClient>;
static const std::string THRIFT_PROTOCOL_JSON = "json";
static const std::string THRIFT_PROTOCOL_BINARY = "binary";
@ -23,13 +23,13 @@ public:
ThriftClient();
virtual ~ThriftClient();
MegasearchServiceClientPtr interface();
ServiceClientPtr interface();
Status Connect(const std::string& address, int32_t port, const std::string& protocol);
Status Disconnect();
private:
MegasearchServiceClientPtr client_;
ServiceClientPtr client_;
};

View File

@ -6,7 +6,7 @@
#include "ConnectionImpl.h"
#include "version.h"
namespace megasearch {
namespace milvus {
std::shared_ptr<Connection>
Connection::Create() {
@ -14,7 +14,7 @@ Connection::Create() {
}
Status
Connection::Destroy(std::shared_ptr<megasearch::Connection> connection_ptr) {
Connection::Destroy(std::shared_ptr<milvus::Connection> connection_ptr) {
if(connection_ptr != nullptr) {
return connection_ptr->Disconnect();
}
@ -48,7 +48,7 @@ ConnectionImpl::Disconnect() {
std::string
ConnectionImpl::ClientVersion() const {
return MEGASEARCH_VERSION;
return MILVUS_VERSION;
}
Status

View File

@ -5,10 +5,10 @@
******************************************************************************/
#pragma once
#include "MegaSearch.h"
#include "MilvusApi.h"
#include "client/ClientProxy.h"
namespace megasearch {
namespace milvus {
class ConnectionImpl : public Connection {
public:

View File

@ -6,7 +6,7 @@
#include "Status.h"
namespace megasearch {
namespace milvus {
Status::~Status() noexcept {
if (state_ != nullptr) {

View File

@ -8,15 +8,15 @@
#include <map>
namespace megasearch {
namespace milvus {
static const std::string INDEX_RAW = "raw";
static const std::string INDEX_IVFFLAT = "ivfflat";
std::string ConvertUtil::IndexType2Str(megasearch::IndexType index) {
static const std::map<megasearch::IndexType, std::string> s_index2str = {
{megasearch::IndexType::cpu_idmap, INDEX_RAW},
{megasearch::IndexType::gpu_ivfflat, INDEX_IVFFLAT}
std::string ConvertUtil::IndexType2Str(IndexType index) {
static const std::map<IndexType, std::string> s_index2str = {
{IndexType::cpu_idmap, INDEX_RAW},
{IndexType::gpu_ivfflat, INDEX_IVFFLAT}
};
const auto& iter = s_index2str.find(index);
@ -27,10 +27,10 @@ std::string ConvertUtil::IndexType2Str(megasearch::IndexType index) {
return iter->second;
}
megasearch::IndexType ConvertUtil::Str2IndexType(const std::string& type) {
static const std::map<std::string, megasearch::IndexType> s_str2index = {
{INDEX_RAW, megasearch::IndexType::cpu_idmap},
{INDEX_IVFFLAT, megasearch::IndexType::gpu_ivfflat}
IndexType ConvertUtil::Str2IndexType(const std::string& type) {
static const std::map<std::string, IndexType> s_str2index = {
{INDEX_RAW, IndexType::cpu_idmap},
{INDEX_IVFFLAT, IndexType::gpu_ivfflat}
};
const auto& iter = s_str2index.find(type);

View File

@ -5,14 +5,14 @@
******************************************************************************/
#pragma once
#include "MegaSearch.h"
#include "MilvusApi.h"
namespace megasearch {
namespace milvus {
class ConvertUtil {
public:
static std::string IndexType2Str(megasearch::IndexType index);
static megasearch::IndexType Str2IndexType(const std::string& type);
static std::string IndexType2Str(IndexType index);
static IndexType Str2IndexType(const std::string& type);
};
}

View File

@ -9,7 +9,7 @@
#include <exception>
namespace megasearch {
namespace milvus {
class Exception : public std::exception {
public:
Exception(StatusCode error_code,

View File

@ -0,0 +1,42 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "DBWrapper.h"
#include "ServerConfig.h"
#include "utils/CommonUtil.h"
#include "utils/Log.h"
namespace zilliz {
namespace milvus {
namespace server {
DBWrapper::DBWrapper() {
zilliz::milvus::engine::Options opt;
ConfigNode& config = ServerConfig::GetInstance().GetConfig(CONFIG_DB);
opt.meta.backend_uri = config.GetValue(CONFIG_DB_URL);
std::string db_path = config.GetValue(CONFIG_DB_PATH);
opt.meta.path = db_path + "/db";
int64_t index_size = config.GetInt64Value(CONFIG_DB_INDEX_TRIGGER_SIZE);
if(index_size > 0) {//ensure larger than zero, unit is MB
opt.index_trigger_size = (size_t)index_size * engine::ONE_MB;
}
CommonUtil::CreateDirectory(opt.meta.path);
zilliz::milvus::engine::DB::Open(opt, &db_);
if(db_ == nullptr) {
SERVER_LOG_ERROR << "Failed to open db";
throw ServerException(SERVER_NULL_POINTER, "Failed to open db");
}
}
DBWrapper::~DBWrapper() {
delete db_;
}
}
}
}

View File

@ -0,0 +1,34 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "db/DB.h"
#include "db/Meta.h"
namespace zilliz {
namespace milvus {
namespace server {
class DBWrapper {
private:
DBWrapper();
~DBWrapper();
public:
static zilliz::milvus::engine::DB* DB() {
static DBWrapper db_wrapper;
return db_wrapper.db();
}
zilliz::milvus::engine::DB* db() { return db_; }
private:
zilliz::milvus::engine::DB* db_ = nullptr;
};
}
}
}

View File

@ -1,83 +0,0 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "MegasearchHandler.h"
#include "MegasearchTask.h"
#include "utils/TimeRecorder.h"
namespace zilliz {
namespace milvus {
namespace server {
using namespace megasearch;
MegasearchServiceHandler::MegasearchServiceHandler() {
}
void
MegasearchServiceHandler::CreateTable(const thrift::TableSchema &param) {
BaseTaskPtr task_ptr = CreateTableTask::Create(param);
MegasearchScheduler::ExecTask(task_ptr);
}
void
MegasearchServiceHandler::DeleteTable(const std::string &table_name) {
BaseTaskPtr task_ptr = DeleteTableTask::Create(table_name);
MegasearchScheduler::ExecTask(task_ptr);
}
void
MegasearchServiceHandler::AddVector(std::vector<int64_t> &_return,
const std::string &table_name,
const std::vector<thrift::RowRecord> &record_array) {
BaseTaskPtr task_ptr = AddVectorTask::Create(table_name, record_array, _return);
MegasearchScheduler::ExecTask(task_ptr);
}
void
MegasearchServiceHandler::SearchVector(std::vector<megasearch::thrift::TopKQueryResult> & _return,
const std::string& table_name,
const std::vector<megasearch::thrift::RowRecord> & query_record_array,
const std::vector<megasearch::thrift::Range> & query_range_array,
const int64_t topk) {
BaseTaskPtr task_ptr = SearchVectorTask::Create(table_name, query_record_array, query_range_array, topk, _return);
MegasearchScheduler::ExecTask(task_ptr);
}
void
MegasearchServiceHandler::DescribeTable(thrift::TableSchema &_return, const std::string &table_name) {
BaseTaskPtr task_ptr = DescribeTableTask::Create(table_name, _return);
MegasearchScheduler::ExecTask(task_ptr);
}
int64_t
MegasearchServiceHandler::GetTableRowCount(const std::string& table_name) {
int64_t row_count = 0;
{
BaseTaskPtr task_ptr = GetTableRowCountTask::Create(table_name, row_count);
MegasearchScheduler::ExecTask(task_ptr);
task_ptr->WaitToFinish();
}
return row_count;
}
void
MegasearchServiceHandler::ShowTables(std::vector<std::string> &_return) {
BaseTaskPtr task_ptr = ShowTablesTask::Create(_return);
MegasearchScheduler::ExecTask(task_ptr);
}
void
MegasearchServiceHandler::Ping(std::string& _return, const std::string& cmd) {
BaseTaskPtr task_ptr = PingTask::Create(cmd, _return);
MegasearchScheduler::ExecTask(task_ptr);
}
}
}
}

View File

@ -1,34 +0,0 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "metrics/Metrics.h"
#include "MegasearchThreadPoolServer.h"
namespace zilliz {
namespace milvus {
namespace server {
void
MegasearchThreadPoolServer::onClientConnected(const std::shared_ptr<apache::thrift::server::TConnectedClient> &pClient) {
server::Metrics::GetInstance().ConnectionGaugeIncrement();
TThreadPoolServer::onClientConnected(pClient);
}
void
MegasearchThreadPoolServer::onClientDisconnected(apache::thrift::server::TConnectedClient *pClient) {
server::Metrics::GetInstance().ConnectionGaugeDecrement();
TThreadPoolServer::onClientDisconnected(pClient);
}
zilliz::milvus::server::MegasearchThreadPoolServer::MegasearchThreadPoolServer(const std::shared_ptr<apache::thrift::TProcessor> &processor,
const std::shared_ptr<apache::thrift::transport::TServerTransport> &serverTransport,
const std::shared_ptr<apache::thrift::transport::TTransportFactory> &transportFactory,
const std::shared_ptr<apache::thrift::protocol::TProtocolFactory> &protocolFactory,
const std::shared_ptr<apache::thrift::concurrency::ThreadManager> &threadManager)
: TThreadPoolServer(processor, serverTransport, transportFactory, protocolFactory, threadManager) {
}
}
}
}

View File

@ -3,12 +3,14 @@
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "MegasearchServer.h"
#include "MegasearchHandler.h"
#include "megasearch_types.h"
#include "megasearch_constants.h"
#include "MilvusServer.h"
#include "RequestHandler.h"
#include "ServerConfig.h"
#include "MegasearchThreadPoolServer.h"
#include "ThreadPoolServer.h"
#include "DBWrapper.h"
#include "milvus_types.h"
#include "milvus_constants.h"
#include <thrift/protocol/TBinaryProtocol.h>
#include <thrift/protocol/TJSONProtocol.h>
@ -26,7 +28,7 @@ namespace zilliz {
namespace milvus {
namespace server {
using namespace megasearch::thrift;
using namespace ::milvus::thrift;
using namespace ::apache::thrift;
using namespace ::apache::thrift::protocol;
using namespace ::apache::thrift::transport;
@ -36,7 +38,7 @@ using namespace ::apache::thrift::concurrency;
static stdcxx::shared_ptr<TServer> s_server;
void
MegasearchServer::StartService() {
MilvusServer::StartService() {
if(s_server != nullptr){
StopService();
}
@ -45,13 +47,15 @@ MegasearchServer::StartService() {
ConfigNode server_config = config.GetConfig(CONFIG_SERVER);
std::string address = server_config.GetValue(CONFIG_SERVER_ADDRESS, "127.0.0.1");
int32_t port = server_config.GetInt32Value(CONFIG_SERVER_PORT, 33001);
int32_t port = server_config.GetInt32Value(CONFIG_SERVER_PORT, 19530);
std::string protocol = server_config.GetValue(CONFIG_SERVER_PROTOCOL, "binary");
std::string mode = server_config.GetValue(CONFIG_SERVER_MODE, "thread_pool");
try {
stdcxx::shared_ptr<MegasearchServiceHandler> handler(new MegasearchServiceHandler());
stdcxx::shared_ptr<TProcessor> processor(new MegasearchServiceProcessor(handler));
DBWrapper::DB();//initialize db
stdcxx::shared_ptr<RequestHandler> handler(new RequestHandler());
stdcxx::shared_ptr<TProcessor> processor(new MilvusServiceProcessor(handler));
stdcxx::shared_ptr<TServerTransport> server_transport(new TServerSocket(address, port));
stdcxx::shared_ptr<TTransportFactory> transport_factory(new TBufferedTransportFactory());
@ -77,7 +81,7 @@ MegasearchServer::StartService() {
threadManager->threadFactory(threadFactory);
threadManager->start();
s_server.reset(new MegasearchThreadPoolServer(processor,
s_server.reset(new ThreadPoolServer(processor,
server_transport,
transport_factory,
protocol_factory,
@ -93,7 +97,7 @@ MegasearchServer::StartService() {
}
void
MegasearchServer::StopService() {
MilvusServer::StopService() {
auto stop_server_worker = [&]{
if(s_server != nullptr) {
s_server->stop();

View File

@ -12,7 +12,7 @@ namespace zilliz {
namespace milvus {
namespace server {
class MegasearchServer {
class MilvusServer {
public:
static void StartService();
static void StopService();

View File

@ -0,0 +1,96 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "RequestHandler.h"
#include "RequestTask.h"
#include "utils/TimeRecorder.h"
namespace zilliz {
namespace milvus {
namespace server {
using namespace ::milvus;
RequestHandler::RequestHandler() {
}
void
RequestHandler::CreateTable(const thrift::TableSchema &param) {
BaseTaskPtr task_ptr = CreateTableTask::Create(param);
RequestScheduler::ExecTask(task_ptr);
}
void
RequestHandler::DeleteTable(const std::string &table_name) {
BaseTaskPtr task_ptr = DeleteTableTask::Create(table_name);
RequestScheduler::ExecTask(task_ptr);
}
void
RequestHandler::AddVector(std::vector<int64_t> &_return,
const std::string &table_name,
const std::vector<thrift::RowRecord> &record_array) {
BaseTaskPtr task_ptr = AddVectorTask::Create(table_name, record_array, _return);
RequestScheduler::ExecTask(task_ptr);
}
void
RequestHandler::SearchVector(std::vector<thrift::TopKQueryResult> &_return,
const std::string &table_name,
const std::vector<thrift::RowRecord> &query_record_array,
const std::vector<thrift::Range> &query_range_array,
const int64_t topk) {
BaseTaskPtr task_ptr = SearchVectorTask::Create(table_name, std::vector<std::string>(), query_record_array,
query_range_array, topk, _return);
RequestScheduler::ExecTask(task_ptr);
}
void
RequestHandler::SearchVectorInFiles(std::vector<::milvus::thrift::TopKQueryResult> &_return,
const std::string& table_name,
const std::vector<std::string> &file_id_array,
const std::vector<::milvus::thrift::RowRecord> &query_record_array,
const std::vector<::milvus::thrift::Range> &query_range_array,
const int64_t topk) {
BaseTaskPtr task_ptr = SearchVectorTask::Create(table_name, file_id_array, query_record_array,
query_range_array, topk, _return);
RequestScheduler::ExecTask(task_ptr);
}
void
RequestHandler::DescribeTable(thrift::TableSchema &_return, const std::string &table_name) {
BaseTaskPtr task_ptr = DescribeTableTask::Create(table_name, _return);
RequestScheduler::ExecTask(task_ptr);
}
int64_t
RequestHandler::GetTableRowCount(const std::string& table_name) {
int64_t row_count = 0;
{
BaseTaskPtr task_ptr = GetTableRowCountTask::Create(table_name, row_count);
RequestScheduler::ExecTask(task_ptr);
task_ptr->WaitToFinish();
}
return row_count;
}
void
RequestHandler::ShowTables(std::vector<std::string> &_return) {
BaseTaskPtr task_ptr = ShowTablesTask::Create(_return);
RequestScheduler::ExecTask(task_ptr);
}
void
RequestHandler::Ping(std::string& _return, const std::string& cmd) {
BaseTaskPtr task_ptr = PingTask::Create(cmd, _return);
RequestScheduler::ExecTask(task_ptr);
}
}
}
}

View File

@ -8,15 +8,15 @@
#include <cstdint>
#include <string>
#include "MegasearchService.h"
#include "MilvusService.h"
namespace zilliz {
namespace milvus {
namespace server {
class MegasearchServiceHandler : virtual public megasearch::thrift::MegasearchServiceIf {
class RequestHandler : virtual public ::milvus::thrift::MilvusServiceIf {
public:
MegasearchServiceHandler();
RequestHandler();
/**
* @brief Create table method
@ -28,7 +28,7 @@ public:
*
* @param param
*/
void CreateTable(const megasearch::thrift::TableSchema& param);
void CreateTable(const ::milvus::thrift::TableSchema& param);
/**
* @brief Delete table method
@ -57,7 +57,7 @@ public:
*/
void AddVector(std::vector<int64_t> & _return,
const std::string& table_name,
const std::vector<megasearch::thrift::RowRecord> & record_array);
const std::vector<::milvus::thrift::RowRecord> & record_array);
/**
* @brief Query vector
@ -76,10 +76,34 @@ public:
* @param query_range_array
* @param topk
*/
void SearchVector(std::vector<megasearch::thrift::TopKQueryResult> & _return,
void SearchVector(std::vector<::milvus::thrift::TopKQueryResult> & _return,
const std::string& table_name,
const std::vector<megasearch::thrift::RowRecord> & query_record_array,
const std::vector<megasearch::thrift::Range> & query_range_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t topk);
/**
* @brief Internal use query interface
*
* This method is used to query vector in specified files.
*
* @param file_id_array, specified files id array, queried.
* @param query_record_array, all vector are going to be queried.
* @param query_range_array, optional ranges for conditional search. If not specified, search whole table
* @param topk, how many similarity vectors will be searched.
*
* @return query result array.
*
* @param file_id_array
* @param query_record_array
* @param query_range_array
* @param topk
*/
virtual void SearchVectorInFiles(std::vector<::milvus::thrift::TopKQueryResult> & _return,
const std::string& table_name,
const std::vector<std::string> & file_id_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t topk);
/**
@ -93,7 +117,7 @@ public:
*
* @param table_name
*/
void DescribeTable(megasearch::thrift::TableSchema& _return, const std::string& table_name);
void DescribeTable(::milvus::thrift::TableSchema& _return, const std::string& table_name);
/**
* @brief Get table row count

View File

@ -3,17 +3,17 @@
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "MegasearchScheduler.h"
#include "RequestScheduler.h"
#include "utils/Log.h"
#include "megasearch_types.h"
#include "megasearch_constants.h"
#include "milvus_types.h"
#include "milvus_constants.h"
namespace zilliz {
namespace milvus {
namespace server {
using namespace megasearch;
using namespace ::milvus;
namespace {
const std::map<ServerError, thrift::ErrorCode::type> &ErrorMap() {
@ -77,21 +77,21 @@ ServerError BaseTask::WaitToFinish() {
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
MegasearchScheduler::MegasearchScheduler()
RequestScheduler::RequestScheduler()
: stopped_(false) {
Start();
}
MegasearchScheduler::~MegasearchScheduler() {
RequestScheduler::~RequestScheduler() {
Stop();
}
void MegasearchScheduler::ExecTask(BaseTaskPtr& task_ptr) {
void RequestScheduler::ExecTask(BaseTaskPtr& task_ptr) {
if(task_ptr == nullptr) {
return;
}
MegasearchScheduler& scheduler = MegasearchScheduler::GetInstance();
RequestScheduler& scheduler = RequestScheduler::GetInstance();
scheduler.ExecuteTask(task_ptr);
if(!task_ptr->IsAsync()) {
@ -110,7 +110,7 @@ void MegasearchScheduler::ExecTask(BaseTaskPtr& task_ptr) {
}
}
void MegasearchScheduler::Start() {
void RequestScheduler::Start() {
if(!stopped_) {
return;
}
@ -118,7 +118,7 @@ void MegasearchScheduler::Start() {
stopped_ = false;
}
void MegasearchScheduler::Stop() {
void RequestScheduler::Stop() {
if(stopped_) {
return;
}
@ -143,7 +143,7 @@ void MegasearchScheduler::Stop() {
SERVER_LOG_INFO << "Scheduler stopped";
}
ServerError MegasearchScheduler::ExecuteTask(const BaseTaskPtr& task_ptr) {
ServerError RequestScheduler::ExecuteTask(const BaseTaskPtr& task_ptr) {
if(task_ptr == nullptr) {
return SERVER_NULL_POINTER;
}
@ -184,7 +184,7 @@ namespace {
}
}
ServerError MegasearchScheduler::PutTaskToQueue(const BaseTaskPtr& task_ptr) {
ServerError RequestScheduler::PutTaskToQueue(const BaseTaskPtr& task_ptr) {
std::lock_guard<std::mutex> lock(queue_mtx_);
std::string group_name = task_ptr->TaskGroup();

View File

@ -50,10 +50,10 @@ using TaskQueue = BlockingQueue<BaseTaskPtr>;
using TaskQueuePtr = std::shared_ptr<TaskQueue>;
using ThreadPtr = std::shared_ptr<std::thread>;
class MegasearchScheduler {
class RequestScheduler {
public:
static MegasearchScheduler& GetInstance() {
static MegasearchScheduler scheduler;
static RequestScheduler& GetInstance() {
static RequestScheduler scheduler;
return scheduler;
}
@ -65,8 +65,8 @@ public:
static void ExecTask(BaseTaskPtr& task_ptr);
protected:
MegasearchScheduler();
virtual ~MegasearchScheduler();
RequestScheduler();
virtual ~RequestScheduler();
ServerError PutTaskToQueue(const BaseTaskPtr& task_ptr);

View File

@ -3,20 +3,20 @@
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "MegasearchTask.h"
#include "RequestTask.h"
#include "ServerConfig.h"
#include "utils/CommonUtil.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
#include "db/DB.h"
#include "db/Env.h"
#include "db/Meta.h"
#include "DBWrapper.h"
#include "version.h"
namespace zilliz {
namespace milvus {
namespace server {
using namespace ::milvus;
static const std::string DQL_TASK_GROUP = "dql";
static const std::string DDL_DML_TASK_GROUP = "ddl_dml";
static const std::string PING_TASK_GROUP = "ping";
@ -25,40 +25,6 @@ using DB_META = zilliz::milvus::engine::meta::Meta;
using DB_DATE = zilliz::milvus::engine::meta::DateT;
namespace {
class DBWrapper {
public:
DBWrapper() {
zilliz::milvus::engine::Options opt;
ConfigNode& config = ServerConfig::GetInstance().GetConfig(CONFIG_DB);
opt.meta.backend_uri = config.GetValue(CONFIG_DB_URL);
std::string db_path = config.GetValue(CONFIG_DB_PATH);
opt.memory_sync_interval = (uint16_t)config.GetInt32Value(CONFIG_DB_FLUSH_INTERVAL, 10);
opt.meta.path = db_path + "/db";
CommonUtil::CreateDirectory(opt.meta.path);
zilliz::milvus::engine::DB::Open(opt, &db_);
if(db_ == nullptr) {
SERVER_LOG_ERROR << "Failed to open db";
throw ServerException(SERVER_NULL_POINTER, "Failed to open db");
}
}
~DBWrapper() {
delete db_;
}
zilliz::milvus::engine::DB* DB() { return db_; }
private:
zilliz::milvus::engine::DB* db_ = nullptr;
};
zilliz::milvus::engine::DB* DB() {
static DBWrapper db_wrapper;
return db_wrapper.DB();
}
engine::EngineType EngineType(int type) {
static std::map<int, engine::EngineType> map_type = {
{0, engine::EngineType::INVALID},
@ -122,7 +88,7 @@ namespace {
static constexpr long DAY_SECONDS = 86400;
ServerError
ConvertTimeRangeToDBDates(const std::vector<megasearch::thrift::Range> &range_array,
ConvertTimeRangeToDBDates(const std::vector<thrift::Range> &range_array,
std::vector<DB_DATE>& dates) {
dates.clear();
ServerError error_code;
@ -171,19 +137,31 @@ ServerError CreateTableTask::OnExecute() {
TimeRecorder rc("CreateTableTask");
try {
if(schema_.table_name.empty() || schema_.dimension == 0 || schema_.index_type == 0) {
return SERVER_INVALID_ARGUMENT;
//step 1: check arguments
if(schema_.table_name.empty() || schema_.dimension <= 0) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Invalid table name or dimension";
SERVER_LOG_ERROR << error_msg_;
return error_code_;
}
//step 1: construct table schema
engine::EngineType engine_type = EngineType(schema_.index_type);
if(engine_type == engine::EngineType::INVALID) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Invalid index type";
SERVER_LOG_ERROR << error_msg_;
return error_code_;
}
//step 2: construct table schema
engine::meta::TableSchema table_info;
table_info.dimension_ = (uint16_t)schema_.dimension;
table_info.table_id_ = schema_.table_name;
table_info.engine_type_ = (int)EngineType(schema_.index_type);
table_info.store_raw_data_ = schema_.store_raw_vector;
//step 2: create table
engine::Status stat = DB()->CreateTable(table_info);
//step 3: create table
engine::Status stat = DBWrapper::DB()->CreateTable(table_info);
if(!stat.ok()) {//table could exist
error_code_ = SERVER_UNEXPECTED_ERROR;
error_msg_ = "Engine failed: " + stat.ToString();
@ -205,7 +183,7 @@ ServerError CreateTableTask::OnExecute() {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
DescribeTableTask::DescribeTableTask(const std::string &table_name, thrift::TableSchema &schema)
: BaseTask(PING_TASK_GROUP),
: BaseTask(DDL_DML_TASK_GROUP),
table_name_(table_name),
schema_(schema) {
schema_.table_name = table_name_;
@ -219,9 +197,18 @@ ServerError DescribeTableTask::OnExecute() {
TimeRecorder rc("DescribeTableTask");
try {
//step 1: check arguments
if(table_name_.empty()) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Table name cannot be empty";
SERVER_LOG_ERROR << error_msg_;
return error_code_;
}
//step 2: get table info
engine::meta::TableSchema table_info;
table_info.table_id_ = table_name_;
engine::Status stat = DB()->DescribeTable(table_info);
engine::Status stat = DBWrapper::DB()->DescribeTable(table_info);
if(!stat.ok()) {
error_code_ = SERVER_TABLE_NOT_EXIST;
error_msg_ = "Engine failed: " + stat.ToString();
@ -261,7 +248,7 @@ ServerError DeleteTableTask::OnExecute() {
try {
TimeRecorder rc("DeleteTableTask");
//step 1: check validation
//step 1: check arguments
if (table_name_.empty()) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Table name cannot be empty";
@ -272,7 +259,7 @@ ServerError DeleteTableTask::OnExecute() {
//step 2: check table existence
engine::meta::TableSchema table_info;
table_info.table_id_ = table_name_;
engine::Status stat = DB()->DescribeTable(table_info);
engine::Status stat = DBWrapper::DB()->DescribeTable(table_info);
if(!stat.ok()) {
error_code_ = SERVER_TABLE_NOT_EXIST;
error_msg_ = "Engine failed: " + stat.ToString();
@ -284,7 +271,7 @@ ServerError DeleteTableTask::OnExecute() {
//step 3: delete table
std::vector<DB_DATE> dates;
stat = DB()->DeleteTable(table_name_, dates);
stat = DBWrapper::DB()->DeleteTable(table_name_, dates);
if(!stat.ok()) {
SERVER_LOG_ERROR << "Engine failed: " << stat.ToString();
return SERVER_UNEXPECTED_ERROR;
@ -304,7 +291,7 @@ ServerError DeleteTableTask::OnExecute() {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ShowTablesTask::ShowTablesTask(std::vector<std::string>& tables)
: BaseTask(DQL_TASK_GROUP),
: BaseTask(DDL_DML_TASK_GROUP),
tables_(tables) {
}
@ -315,7 +302,7 @@ BaseTaskPtr ShowTablesTask::Create(std::vector<std::string>& tables) {
ServerError ShowTablesTask::OnExecute() {
std::vector<engine::meta::TableSchema> schema_array;
engine::Status stat = DB()->AllTables(schema_array);
engine::Status stat = DBWrapper::DB()->AllTables(schema_array);
if(!stat.ok()) {
error_code_ = SERVER_UNEXPECTED_ERROR;
error_msg_ = "Engine failed: " + stat.ToString();
@ -352,14 +339,25 @@ ServerError AddVectorTask::OnExecute() {
try {
TimeRecorder rc("AddVectorTask");
if(record_array_.empty()) {
return SERVER_SUCCESS;
//step 1: check arguments
if (table_name_.empty()) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Table name cannot be empty";
SERVER_LOG_ERROR << error_msg_;
return error_code_;
}
//step 1: check table existence
if(record_array_.empty()) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Row record array is empty";
SERVER_LOG_ERROR << error_msg_;
return error_code_;
}
//step 2: check table existence
engine::meta::TableSchema table_info;
table_info.table_id_ = table_name_;
engine::Status stat = DB()->DescribeTable(table_info);
engine::Status stat = DBWrapper::DB()->DescribeTable(table_info);
if(!stat.ok()) {
error_code_ = SERVER_TABLE_NOT_EXIST;
error_msg_ = "Engine failed: " + stat.ToString();
@ -369,7 +367,7 @@ ServerError AddVectorTask::OnExecute() {
rc.Record("check validation");
//step 2: prepare float data
//step 3: prepare float data
std::vector<float> vec_f;
error_code_ = ConvertRowRecordToFloatArray(record_array_, table_info.dimension_, vec_f);
if(error_code_ != SERVER_SUCCESS) {
@ -379,9 +377,9 @@ ServerError AddVectorTask::OnExecute() {
rc.Record("prepare vectors data");
//step 3: insert vectors
//step 4: insert vectors
uint64_t vec_count = (uint64_t)record_array_.size();
stat = DB()->InsertVectors(table_name_, vec_count, vec_f.data(), record_ids_);
stat = DBWrapper::DB()->InsertVectors(table_name_, vec_count, vec_f.data(), record_ids_);
rc.Record("add vectors to engine");
if(!stat.ok()) {
error_code_ = SERVER_UNEXPECTED_ERROR;
@ -409,13 +407,15 @@ ServerError AddVectorTask::OnExecute() {
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
SearchVectorTask::SearchVectorTask(const std::string& table_name,
const std::vector<thrift::RowRecord> & query_record_array,
const std::vector<megasearch::thrift::Range> & query_range_array,
SearchVectorTask::SearchVectorTask(const std::string &table_name,
const std::vector<std::string>& file_id_array,
const std::vector<thrift::RowRecord> &query_record_array,
const std::vector<thrift::Range> &query_range_array,
const int64_t top_k,
std::vector<thrift::TopKQueryResult>& result_array)
std::vector<thrift::TopKQueryResult> &result_array)
: BaseTask(DQL_TASK_GROUP),
table_name_(table_name),
file_id_array_(file_id_array),
record_array_(query_record_array),
range_array_(query_range_array),
top_k_(top_k),
@ -424,11 +424,12 @@ SearchVectorTask::SearchVectorTask(const std::string& table_name,
}
BaseTaskPtr SearchVectorTask::Create(const std::string& table_name,
const std::vector<std::string>& file_id_array,
const std::vector<thrift::RowRecord> & query_record_array,
const std::vector<megasearch::thrift::Range> & query_range_array,
const std::vector<thrift::Range> & query_range_array,
const int64_t top_k,
std::vector<thrift::TopKQueryResult>& result_array) {
return std::shared_ptr<BaseTask>(new SearchVectorTask(table_name,
return std::shared_ptr<BaseTask>(new SearchVectorTask(table_name, file_id_array,
query_record_array, query_range_array, top_k, result_array));
}
@ -436,7 +437,14 @@ ServerError SearchVectorTask::OnExecute() {
try {
TimeRecorder rc("SearchVectorTask");
//step 1: check validation
//step 1: check arguments
if (table_name_.empty()) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Table name cannot be empty";
SERVER_LOG_ERROR << error_msg_;
return error_code_;
}
if(top_k_ <= 0 || record_array_.empty()) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Invalid topk value, or query record array is empty";
@ -447,7 +455,7 @@ ServerError SearchVectorTask::OnExecute() {
//step 2: check table existence
engine::meta::TableSchema table_info;
table_info.table_id_ = table_name_;
engine::Status stat = DB()->DescribeTable(table_info);
engine::Status stat = DBWrapper::DB()->DescribeTable(table_info);
if(!stat.ok()) {
error_code_ = SERVER_TABLE_NOT_EXIST;
error_msg_ = "Engine failed: " + stat.ToString();
@ -478,7 +486,13 @@ ServerError SearchVectorTask::OnExecute() {
//step 4: search vectors
engine::QueryResults results;
uint64_t record_count = (uint64_t)record_array_.size();
stat = DB()->Query(table_name_, (size_t)top_k_, record_count, vec_f.data(), dates, results);
if(file_id_array_.empty()) {
stat = DBWrapper::DB()->Query(table_name_, (size_t) top_k_, record_count, vec_f.data(), dates, results);
} else {
stat = DBWrapper::DB()->Query(table_name_, file_id_array_, (size_t) top_k_, record_count, vec_f.data(), dates, results);
}
rc.Record("search vectors from engine");
if(!stat.ok()) {
SERVER_LOG_ERROR << "Engine failed: " << stat.ToString();
@ -510,6 +524,7 @@ ServerError SearchVectorTask::OnExecute() {
}
rc.Record("construct result");
rc.Elapse("totally cost");
} catch (std::exception& ex) {
error_code_ = SERVER_UNEXPECTED_ERROR;
error_msg_ = ex.what();
@ -522,7 +537,7 @@ ServerError SearchVectorTask::OnExecute() {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
GetTableRowCountTask::GetTableRowCountTask(const std::string& table_name, int64_t& row_count)
: BaseTask(DQL_TASK_GROUP),
: BaseTask(DDL_DML_TASK_GROUP),
table_name_(table_name),
row_count_(row_count) {
@ -536,7 +551,7 @@ ServerError GetTableRowCountTask::OnExecute() {
try {
TimeRecorder rc("GetTableRowCountTask");
//step 1: check validation
//step 1: check arguments
if (table_name_.empty()) {
error_code_ = SERVER_INVALID_ARGUMENT;
error_msg_ = "Table name cannot be empty";
@ -546,7 +561,7 @@ ServerError GetTableRowCountTask::OnExecute() {
//step 2: get row count
uint64_t row_count = 0;
engine::Status stat = DB()->GetTableRowCount(table_name_, row_count);
engine::Status stat = DBWrapper::DB()->GetTableRowCount(table_name_, row_count);
if (!stat.ok()) {
error_code_ = SERVER_UNEXPECTED_ERROR;
error_msg_ = "Engine failed: " + stat.ToString();
@ -582,7 +597,7 @@ BaseTaskPtr PingTask::Create(const std::string& cmd, std::string& result) {
ServerError PingTask::OnExecute() {
if(cmd_ == "version") {
result_ = MEGASEARCH_VERSION;
result_ = MILVUS_VERSION;
}
return SERVER_SUCCESS;

View File

@ -5,12 +5,12 @@
******************************************************************************/
#pragma once
#include "MegasearchScheduler.h"
#include "RequestScheduler.h"
#include "utils/Error.h"
#include "utils/AttributeSerializer.h"
#include "db/Types.h"
#include "megasearch_types.h"
#include "milvus_types.h"
#include <condition_variable>
#include <memory>
@ -19,35 +19,34 @@ namespace zilliz {
namespace milvus {
namespace server {
using namespace megasearch;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CreateTableTask : public BaseTask {
public:
static BaseTaskPtr Create(const thrift::TableSchema& schema);
static BaseTaskPtr Create(const ::milvus::thrift::TableSchema& schema);
protected:
CreateTableTask(const thrift::TableSchema& schema);
CreateTableTask(const ::milvus::thrift::TableSchema& schema);
ServerError OnExecute() override;
private:
const thrift::TableSchema& schema_;
const ::milvus::thrift::TableSchema& schema_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class DescribeTableTask : public BaseTask {
public:
static BaseTaskPtr Create(const std::string& table_name, thrift::TableSchema& schema);
static BaseTaskPtr Create(const std::string& table_name, ::milvus::thrift::TableSchema& schema);
protected:
DescribeTableTask(const std::string& table_name, thrift::TableSchema& schema);
DescribeTableTask(const std::string& table_name, ::milvus::thrift::TableSchema& schema);
ServerError OnExecute() override;
private:
std::string table_name_;
thrift::TableSchema& schema_;
::milvus::thrift::TableSchema& schema_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -83,19 +82,19 @@ private:
class AddVectorTask : public BaseTask {
public:
static BaseTaskPtr Create(const std::string& table_name,
const std::vector<thrift::RowRecord>& record_array,
const std::vector<::milvus::thrift::RowRecord>& record_array,
std::vector<int64_t>& record_ids_);
protected:
AddVectorTask(const std::string& table_name,
const std::vector<thrift::RowRecord>& record_array,
const std::vector<::milvus::thrift::RowRecord>& record_array,
std::vector<int64_t>& record_ids_);
ServerError OnExecute() override;
private:
std::string table_name_;
const std::vector<thrift::RowRecord>& record_array_;
const std::vector<::milvus::thrift::RowRecord>& record_array_;
std::vector<int64_t>& record_ids_;
};
@ -103,26 +102,29 @@ private:
class SearchVectorTask : public BaseTask {
public:
static BaseTaskPtr Create(const std::string& table_name,
const std::vector<thrift::RowRecord> & query_record_array,
const std::vector<megasearch::thrift::Range> & query_range_array,
const std::vector<std::string>& file_id_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t top_k,
std::vector<thrift::TopKQueryResult>& result_array);
std::vector<::milvus::thrift::TopKQueryResult>& result_array);
protected:
SearchVectorTask(const std::string& table_name,
const std::vector<thrift::RowRecord> & query_record_array,
const std::vector<megasearch::thrift::Range> & query_range_array,
const std::vector<std::string>& file_id_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t top_k,
std::vector<thrift::TopKQueryResult>& result_array);
std::vector<::milvus::thrift::TopKQueryResult>& result_array);
ServerError OnExecute() override;
private:
std::string table_name_;
std::vector<std::string> file_id_array_;
int64_t top_k_;
const std::vector<thrift::RowRecord>& record_array_;
const std::vector<megasearch::thrift::Range>& range_array_;
std::vector<thrift::TopKQueryResult>& result_array_;
const std::vector<::milvus::thrift::RowRecord>& record_array_;
const std::vector<::milvus::thrift::Range>& range_array_;
std::vector<::milvus::thrift::TopKQueryResult>& result_array_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -5,7 +5,7 @@
////////////////////////////////////////////////////////////////////////////////
#include "Server.h"
#include "ServerConfig.h"
#include "MegasearchServer.h"
#include "MilvusServer.h"
#include "utils/Log.h"
#include "utils/SignalUtil.h"
#include "utils/TimeRecorder.h"
@ -171,7 +171,7 @@ Server::Start() {
signal(SIGTERM, SignalUtil::HandleSignal);
server::Metrics::GetInstance().Init();
server::SystemInfo::GetInstance().Init();
printf("Milvus server start successfully.\n");
std::cout << "Milvus server start successfully." << std::endl;
StartService();
} catch(std::exception& ex){
@ -187,18 +187,18 @@ Server::Start() {
void
Server::Stop() {
printf("Milvus server is going to shutdown ...\n");
std::cout << "Milvus server is going to shutdown ..." << std::endl;
// Unlock and close lockfile
if (pid_fd != -1) {
int ret = lockf(pid_fd, F_ULOCK, 0);
if(ret != 0){
printf("Can't lock file: %s\n", strerror(errno));
std::cout << "Can't lock file: " << strerror(errno) << std::endl;
exit(0);
}
ret = close(pid_fd);
if(ret != 0){
printf("Can't close file: %s\n", strerror(errno));
std::cout << "Can't close file: " << strerror(errno) << std::endl;
exit(0);
}
}
@ -207,7 +207,7 @@ Server::Stop() {
if (!pid_filename_.empty()) {
int ret = unlink(pid_filename_.c_str());
if(ret != 0){
printf("Can't unlink file: %s\n", strerror(errno));
std::cout << "Can't unlink file: " << strerror(errno) << std::endl;
exit(0);
}
}
@ -219,7 +219,7 @@ Server::Stop() {
#ifdef ENABLE_LICENSE
server::LicenseCheck::GetInstance().StopCountingDown();
#endif
printf("Milvus server is closed!\n");
std::cout << "Milvus server is closed!" << std::endl;
}
@ -232,12 +232,12 @@ Server::LoadConfig() {
void
Server::StartService() {
MegasearchServer::StartService();
MilvusServer::StartService();
}
void
Server::StopService() {
MegasearchServer::StopService();
MilvusServer::StopService();
}
}

View File

@ -23,8 +23,7 @@ static const std::string CONFIG_SERVER_MODE = "server_mode";
static const std::string CONFIG_DB = "db_config";
static const std::string CONFIG_DB_URL = "db_backend_url";
static const std::string CONFIG_DB_PATH = "db_path";
static const std::string CONFIG_DB_FLUSH_INTERVAL = "db_flush_interval";
static const std::string CONFIG_DB_IDMAPPER_MAX_FILE = "idmapper_max_open_file";
static const std::string CONFIG_DB_INDEX_TRIGGER_SIZE = "index_building_threshold";
static const std::string CONFIG_LOG = "log_config";

View File

@ -0,0 +1,36 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "metrics/Metrics.h"
#include "ThreadPoolServer.h"
namespace zilliz {
namespace milvus {
namespace server {
void
ThreadPoolServer::onClientConnected(const std::shared_ptr<apache::thrift::server::TConnectedClient> &pClient) {
server::Metrics::GetInstance().ConnectionGaugeIncrement();
TThreadPoolServer::onClientConnected(pClient);
}
void
ThreadPoolServer::onClientDisconnected(apache::thrift::server::TConnectedClient *pClient) {
server::Metrics::GetInstance().ConnectionGaugeDecrement();
TThreadPoolServer::onClientDisconnected(pClient);
}
zilliz::milvus::server::ThreadPoolServer::ThreadPoolServer(const std::shared_ptr<apache::thrift::TProcessor> &processor,
const std::shared_ptr<apache::thrift::transport::TServerTransport> &serverTransport,
const std::shared_ptr<apache::thrift::transport::TTransportFactory> &transportFactory,
const std::shared_ptr<apache::thrift::protocol::TProtocolFactory> &protocolFactory,
const std::shared_ptr<apache::thrift::concurrency::ThreadManager> &threadManager)
: TThreadPoolServer(processor, serverTransport, transportFactory, protocolFactory, threadManager) {
}
}
}
}

View File

@ -13,9 +13,9 @@ namespace zilliz {
namespace milvus {
namespace server {
class MegasearchThreadPoolServer : public apache::thrift::server::TThreadPoolServer {
class ThreadPoolServer : public apache::thrift::server::TThreadPoolServer {
public:
MegasearchThreadPoolServer(
ThreadPoolServer(
const std::shared_ptr<apache::thrift::TProcessor>& processor,
const std::shared_ptr<apache::thrift::transport::TServerTransport>& serverTransport,
const std::shared_ptr<apache::thrift::transport::TTransportFactory>& transportFactory,

View File

@ -0,0 +1,32 @@
#pragma once
#include "db/Status.h"
#include <string>
namespace zilliz {
namespace milvus {
namespace engine {
namespace storage {
class IStorage {
public:
virtual Status Create(const std::string &ip_address,
const std::string &port,
const std::string &access_key,
const std::string &secret_key) = 0;
virtual Status Close() = 0;
virtual Status CreateBucket(std::string& bucket_name) = 0;
virtual Status DeleteBucket(std::string& bucket_name) = 0;
virtual Status UploadFile(std::string &bucket_name, std::string &object_key, std::string &path_key) = 0;
virtual Status DownloadFile(std::string &bucket_name, std::string &object_key, std::string &path_key) = 0;
virtual Status DeleteFile(std::string &bucket_name, std::string &object_key) = 0;
};
}
}
}
}

View File

@ -0,0 +1,159 @@
#include "S3ClientWrapper.h"
#include <aws/s3/model/CreateBucketRequest.h>
#include <aws/s3/model/DeleteBucketRequest.h>
#include <aws/s3/model/PutObjectRequest.h>
#include <aws/s3/model/GetObjectRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <iostream>
#include <fstream>
namespace zilliz {
namespace milvus {
namespace engine {
namespace storage {
Status
S3ClientWrapper::Create(const std::string &ip_address,
const std::string &port,
const std::string &access_key,
const std::string &secret_key) {
Aws::InitAPI(options_);
Aws::Client::ClientConfiguration cfg;
// TODO: ip_address need to be validated.
cfg.endpointOverride = ip_address + ":" + port; // S3 server ip address and port
cfg.scheme = Aws::Http::Scheme::HTTP;
cfg.verifySSL =
false; //Aws::Auth::AWSCredentials cred("RPW421T9GSIO4A45Y9ZR", "2owKYy9emSS90Q0pXuyqpX1OxBCyEDYodsiBemcq"); // 认证的Key
client_ =
new S3Client(Aws::Auth::AWSCredentials(access_key, secret_key),
cfg,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Always,
false);
if (client_ == nullptr) {
std::string error = "Can't connect server.";
return Status::Error(error);
} else {
return Status::OK();
}
}
Status
S3ClientWrapper::Close() {
if (client_ != nullptr) {
delete client_;
client_ = nullptr;
}
Aws::ShutdownAPI(options_);
return Status::OK();
}
Status
S3ClientWrapper::CreateBucket(std::string& bucket_name) {
Aws::S3::Model::CreateBucketRequest request;
request.SetBucket(bucket_name);
auto outcome = client_->CreateBucket(request);
if (outcome.IsSuccess())
{
return Status::OK();
}
else
{
std::cout << "CreateBucket error: "
<< outcome.GetError().GetExceptionName() << std::endl
<< outcome.GetError().GetMessage() << std::endl;
switch(outcome.GetError().GetErrorType()) {
case Aws::S3::S3Errors::BUCKET_ALREADY_EXISTS:
case Aws::S3::S3Errors::BUCKET_ALREADY_OWNED_BY_YOU:
return Status::AlreadyExist(outcome.GetError().GetMessage());
default:
return Status::Error(outcome.GetError().GetMessage());
}
}
}
Status
S3ClientWrapper::DeleteBucket(std::string& bucket_name) {
Aws::S3::Model::DeleteBucketRequest bucket_request;
bucket_request.SetBucket(bucket_name);
auto outcome = client_->DeleteBucket(bucket_request);
if (outcome.IsSuccess())
{
return Status::OK();
}
else
{
std::cout << "DeleteBucket error: "
<< outcome.GetError().GetExceptionName() << " - "
<< outcome.GetError().GetMessage() << std::endl;
return Status::Error(outcome.GetError().GetMessage());
}
}
Status
S3ClientWrapper::UploadFile(std::string &BucketName, std::string &objectKey, std::string &pathkey) {
PutObjectRequest putObjectRequest;
putObjectRequest.WithBucket(BucketName.c_str()).WithKey(objectKey.c_str());
auto input_data = Aws::MakeShared<Aws::FStream>("PutObjectInputStream",
pathkey.c_str(),
std::ios_base::in | std::ios_base::binary);
putObjectRequest.SetBody(input_data);
auto put_object_result = client_->PutObject(putObjectRequest);
if (put_object_result.IsSuccess()) {
return Status::OK();
} else {
std::cout << "PutObject error: " << put_object_result.GetError().GetExceptionName() << " "
<< put_object_result.GetError().GetMessage() << std::endl;
return Status::Error(put_object_result.GetError().GetMessage());
}
}
Status
S3ClientWrapper::DownloadFile(std::string &BucketName, std::string &objectKey, std::string &pathkey) {
GetObjectRequest object_request;
object_request.WithBucket(BucketName.c_str()).WithKey(objectKey.c_str());
auto get_object_outcome = client_->GetObject(object_request);
if (get_object_outcome.IsSuccess()) {
Aws::OFStream local_file(pathkey.c_str(), std::ios::out | std::ios::binary);
local_file << get_object_outcome.GetResult().GetBody().rdbuf();
return Status::OK();
} else {
std::cout << "GetObject error: " << get_object_outcome.GetError().GetExceptionName() << " "
<< get_object_outcome.GetError().GetMessage() << std::endl;
return Status::Error(get_object_outcome.GetError().GetMessage());
}
}
Status
S3ClientWrapper::DeleteFile(std::string &bucket_name, std::string &object_key) {
Aws::S3::Model::DeleteObjectRequest object_request;
object_request.WithBucket(bucket_name).WithKey(object_key);
auto delete_object_outcome = client_->DeleteObject(object_request);
if (delete_object_outcome.IsSuccess()) {
return Status::OK();
} else {
std::cout << "DeleteObject error: " <<
delete_object_outcome.GetError().GetExceptionName() << " " <<
delete_object_outcome.GetError().GetMessage() << std::endl;
return Status::Error(delete_object_outcome.GetError().GetMessage());
}
}
}
}
}
}

View File

@ -0,0 +1,45 @@
#pragma once
#include "storage/IStorage.h"
#include <aws/s3/S3Client.h>
#include <aws/core/Aws.h>
#include <aws/core/auth/AWSCredentialsProvider.h>
using namespace Aws::S3;
using namespace Aws::S3::Model;
namespace zilliz {
namespace milvus {
namespace engine {
namespace storage {
class S3ClientWrapper : public IStorage {
public:
S3ClientWrapper() = default;
~S3ClientWrapper() = default;
Status Create(const std::string &ip_address,
const std::string &port,
const std::string &access_key,
const std::string &secret_key) override;
Status Close() override;
Status CreateBucket(std::string& bucket_name) override;
Status DeleteBucket(std::string& bucket_name) override;
Status UploadFile(std::string &BucketName, std::string &objectKey, std::string &pathkey) override;
Status DownloadFile(std::string &BucketName, std::string &objectKey, std::string &pathkey) override;
Status DeleteFile(std::string &bucket_name, std::string &object_key) override;
private:
S3Client *client_ = nullptr;
Aws::SDKOptions options_;
};
}
}
}
}

View File

@ -1,4 +1,4 @@
#!/bin/bash
thrift -r --gen cpp ./megasearch.thrift
thrift -r --gen cpp ./milvus.thrift

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
// This autogenerated skeleton file illustrates how to build a server.
// You should copy it to another filename to avoid overwriting it.
#include "MegasearchService.h"
#include "MilvusService.h"
#include <thrift/protocol/TBinaryProtocol.h>
#include <thrift/server/TSimpleServer.h>
#include <thrift/transport/TServerSocket.h>
@ -12,11 +12,11 @@ using namespace ::apache::thrift::protocol;
using namespace ::apache::thrift::transport;
using namespace ::apache::thrift::server;
using namespace ::megasearch::thrift;
using namespace ::milvus::thrift;
class MegasearchServiceHandler : virtual public MegasearchServiceIf {
class MilvusServiceHandler : virtual public MilvusServiceIf {
public:
MegasearchServiceHandler() {
MilvusServiceHandler() {
// Your initialization goes here
}
@ -90,6 +90,29 @@ class MegasearchServiceHandler : virtual public MegasearchServiceIf {
printf("SearchVector\n");
}
/**
* @brief Internal use query interface
*
* This method is used to query vector in specified files.
*
* @param file_id_array, specified files id array, queried.
* @param query_record_array, all vector are going to be queried.
* @param query_range_array, optional ranges for conditional search. If not specified, search whole table
* @param topk, how many similarity vectors will be searched.
*
* @return query result array.
*
* @param table_name
* @param file_id_array
* @param query_record_array
* @param query_range_array
* @param topk
*/
void SearchVectorInFiles(std::vector<TopKQueryResult> & _return, const std::string& table_name, const std::vector<std::string> & file_id_array, const std::vector<RowRecord> & query_record_array, const std::vector<Range> & query_range_array, const int64_t topk) {
// Your implementation goes here
printf("SearchVectorInFiles\n");
}
/**
* @brief Get table schema
*
@ -153,8 +176,8 @@ class MegasearchServiceHandler : virtual public MegasearchServiceIf {
int main(int argc, char **argv) {
int port = 9090;
::apache::thrift::stdcxx::shared_ptr<MegasearchServiceHandler> handler(new MegasearchServiceHandler());
::apache::thrift::stdcxx::shared_ptr<TProcessor> processor(new MegasearchServiceProcessor(handler));
::apache::thrift::stdcxx::shared_ptr<MilvusServiceHandler> handler(new MilvusServiceHandler());
::apache::thrift::stdcxx::shared_ptr<TProcessor> processor(new MilvusServiceProcessor(handler));
::apache::thrift::stdcxx::shared_ptr<TServerTransport> serverTransport(new TServerSocket(port));
::apache::thrift::stdcxx::shared_ptr<TTransportFactory> transportFactory(new TBufferedTransportFactory());
::apache::thrift::stdcxx::shared_ptr<TProtocolFactory> protocolFactory(new TBinaryProtocolFactory());

View File

@ -1,17 +0,0 @@
/**
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#include "megasearch_constants.h"
namespace megasearch { namespace thrift {
const megasearchConstants g_megasearch_constants;
megasearchConstants::megasearchConstants() {
}
}} // namespace

View File

@ -1,24 +0,0 @@
/**
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#ifndef megasearch_CONSTANTS_H
#define megasearch_CONSTANTS_H
#include "megasearch_types.h"
namespace megasearch { namespace thrift {
class megasearchConstants {
public:
megasearchConstants();
};
extern const megasearchConstants g_megasearch_constants;
}} // namespace
#endif

View File

@ -0,0 +1,17 @@
/**
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#include "milvus_constants.h"
namespace milvus { namespace thrift {
const milvusConstants g_milvus_constants;
milvusConstants::milvusConstants() {
}
}} // namespace

View File

@ -0,0 +1,24 @@
/**
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#ifndef milvus_CONSTANTS_H
#define milvus_CONSTANTS_H
#include "milvus_types.h"
namespace milvus { namespace thrift {
class milvusConstants {
public:
milvusConstants();
};
extern const milvusConstants g_milvus_constants;
}} // namespace
#endif

View File

@ -4,14 +4,14 @@
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#include "megasearch_types.h"
#include "milvus_types.h"
#include <algorithm>
#include <ostream>
#include <thrift/TToString.h>
namespace megasearch { namespace thrift {
namespace milvus { namespace thrift {
int _kErrorCodeValues[] = {
ErrorCode::SUCCESS,

View File

@ -4,8 +4,8 @@
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#ifndef megasearch_TYPES_H
#define megasearch_TYPES_H
#ifndef milvus_TYPES_H
#define milvus_TYPES_H
#include <iosfwd>
@ -18,7 +18,7 @@
#include <thrift/stdcxx.h>
namespace megasearch { namespace thrift {
namespace milvus { namespace thrift {
struct ErrorCode {
enum type {

Some files were not shown because too many files have changed in this diff Show More