diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 85c488ebde..11fca0d6e9 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -27,6 +27,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-45 - Implement DeleteTable interface - MS-75 - cmake: change faiss version to 1.5.2; add CUDA gencode - MS-81 - fix faiss ptx issue; change cuda gencode +- MS-84 - cmake: add arrow, jemalloc and jsoncons third party; default build option OFF ## Task - MS-74 - Change README.md in cpp diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index 538e54fe3b..d72ea9ca5a 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -55,6 +55,8 @@ define_option_string(MEGASEARCH_DEPENDENCY_SOURCE define_option(MEGASEARCH_VERBOSE_THIRDPARTY_BUILD "Show output from ExternalProjects rather than just logging to files" ON) +define_option(MEGASEARCH_WITH_ARROW "Build with ARROW" OFF) + define_option(MEGASEARCH_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" OFF) define_option(MEGASEARCH_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \ @@ -77,6 +79,8 @@ define_option(MEGASEARCH_WITH_LAPACK "Build with LAPACK library" ON) define_option(MEGASEARCH_WITH_LZ4 "Build with lz4 compression" ON) +define_option(MEGASEARCH_WITH_JSONCONS "Build with JSONCONS" OFF) + define_option(MEGASEARCH_WITH_OPENBLAS "Build with OpenBLAS library" ON) define_option(MEGASEARCH_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index ca84bca7b0..25a9a5077f 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -17,11 +17,13 @@ set(MEGASEARCH_THIRDPARTY_DEPENDENCIES + ARROW BOOST BZip2 Easylogging++ FAISS GTest + JSONCONS LAPACK Lz4 OpenBLAS @@ -45,7 +47,9 @@ foreach(DEPENDENCY ${MEGASEARCH_THIRDPARTY_DEPENDENCIES}) endforeach() macro(build_dependency DEPENDENCY_NAME) - if("${DEPENDENCY_NAME}" STREQUAL "BZip2") + if("${DEPENDENCY_NAME}" STREQUAL "ARROW") + build_arrow() + elseif("${DEPENDENCY_NAME}" STREQUAL "BZip2") build_bzip2() elseif("${DEPENDENCY_NAME}" STREQUAL "Easylogging++") build_easyloggingpp() @@ -57,6 +61,8 @@ macro(build_dependency DEPENDENCY_NAME) build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "GTest") build_gtest() + elseif ("${DEPENDENCY_NAME}" STREQUAL "JSONCONS") + build_jsoncons() elseif ("${DEPENDENCY_NAME}" STREQUAL "OpenBLAS") build_openblas() elseif ("${DEPENDENCY_NAME}" STREQUAL "Prometheus") @@ -196,6 +202,14 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) set(${_LIB_NAME} "${_LIB_VERSION}") endforeach() +if(DEFINED ENV{MEGASEARCH_ARROW_URL}) + set(ARROW_SOURCE_URL "$ENV{MEGASEARCH_ARROW_URL}") +else() + set(ARROW_SOURCE_URL + "https://github.com/youny626/arrow.git" + ) +endif() + if(DEFINED ENV{MEGASEARCH_BOOST_URL}) set(BOOST_SOURCE_URL "$ENV{MEGASEARCH_BOOST_URL}") else() @@ -230,6 +244,13 @@ else () "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz") endif() +if (DEFINED ENV{MEGASEARCH_JSONCONS_URL}) + set(JSONCONS_SOURCE_URL "$ENV{MEGASEARCH_JSONCONS_URL}") +else () + set(JSONCONS_SOURCE_URL + "https://github.com/danielaparker/jsoncons/archive/v${JSONCONS_VERSION}.tar.gz") +endif() + if(DEFINED ENV{MEGASEARCH_LAPACK_URL}) set(LAPACK_SOURCE_URL "$ENV{MEGASEARCH_LAPACK_URL}") else() @@ -310,6 +331,93 @@ else() set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz") endif() +# ---------------------------------------------------------------------- +# ARROW + +macro(build_arrow) + message(STATUS "Building Apache ARROW-${ARROW_VERSION} from source") + set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep/cpp") + set(ARROW_STATIC_LIB_NAME arrow) +# set(ARROW_CUDA_STATIC_LIB_NAME arrow_cuda) + set(ARROW_STATIC_LIB + "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) +# set(ARROW_CUDA_STATIC_LIB +# "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_CUDA_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" +# ) + set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include") + + set(ARROW_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} +# "-DARROW_THRIFT_URL=${THRIFT_SOURCE_URL}" + #"env ARROW_THRIFT_URL=${THRIFT_SOURCE_URL}" + -DARROW_BUILD_STATIC=ON + -DARROW_BUILD_SHARED=OFF + -DARROW_PARQUET=ON + -DARROW_USE_GLOG=OFF + -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX} + "-DCMAKE_LIBRARY_PATH=${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs" + -DCMAKE_BUILD_TYPE=Release) + +# set($ENV{ARROW_THRIFT_URL} ${THRIFT_SOURCE_URL}) + + externalproject_add(arrow_ep + GIT_REPOSITORY + ${ARROW_SOURCE_URL} + GIT_TAG + ${ARROW_VERSION} + GIT_SHALLOW + TRUE +# SOURCE_DIR +# ${ARROW_PREFIX} +# BINARY_DIR +# ${ARROW_PREFIX} + SOURCE_SUBDIR + cpp +# COMMAND +# "export \"ARROW_THRIFT_URL=${THRIFT_SOURCE_URL}\"" + ${EP_LOG_OPTIONS} + CMAKE_ARGS + ${ARROW_CMAKE_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + INSTALL_COMMAND + ${MAKE} install +# BUILD_IN_SOURCE +# 1 + BUILD_BYPRODUCTS + "${ARROW_STATIC_LIB}" +# "${ARROW_CUDA_STATIC_LIB}" + ) + +# ExternalProject_Add_StepDependencies(arrow_ep build thrift_ep) + + file(MAKE_DIRECTORY "${ARROW_PREFIX}/include") + add_library(arrow STATIC IMPORTED) + set_target_properties(arrow + PROPERTIES IMPORTED_LOCATION "${ARROW_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}") +# INTERFACE_LINK_LIBRARIES thrift) + add_dependencies(arrow arrow_ep) + + set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep-build/jemalloc_ep-prefix/src/jemalloc_ep") + + add_custom_command(TARGET arrow_ep POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${ARROW_PREFIX}/lib/ + COMMAND ${CMAKE_COMMAND} -E copy ${JEMALLOC_PREFIX}/lib/libjemalloc_pic.a ${ARROW_PREFIX}/lib/ + DEPENDS ${JEMALLOC_PREFIX}/lib/libjemalloc_pic.a) + +endmacro() + +if(MEGASEARCH_WITH_ARROW) + + resolve_dependency(ARROW) + + link_directories(SYSTEM ${ARROW_PREFIX}/lib/) + include_directories(SYSTEM ${ARROW_INCLUDE_DIR}) +endif() + # ---------------------------------------------------------------------- # Add Boost dependencies (code adapted from Apache Kudu (incubating)) @@ -849,6 +957,30 @@ if (MEGASEARCH_BUILD_TESTS) include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) endif() +# ---------------------------------------------------------------------- +# JSONCONS + +macro(build_jsoncons) + message(STATUS "Building JSONCONS-${JSONCONS_VERSION} from source") + + set(JSONCONS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jsoncons_ep-prefix") + set(JSONCONS_TAR_NAME "${JSONCONS_PREFIX}/jsoncons-${JSONCONS_VERSION}.tar.gz") + set(JSONCONS_INCLUDE_DIR "${JSONCONS_PREFIX}/jsoncons-${JSONCONS_VERSION}/include") + if (NOT EXISTS ${JSONCONS_INCLUDE_DIR}) + file(MAKE_DIRECTORY ${JSONCONS_PREFIX}) + file(DOWNLOAD ${JSONCONS_SOURCE_URL} + ${JSONCONS_TAR_NAME}) + execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${JSONCONS_TAR_NAME} + WORKING_DIRECTORY ${JSONCONS_PREFIX}) + + endif () +endmacro() + +if(MEGASEARCH_WITH_JSONCONS) + resolve_dependency(JSONCONS) + include_directories(SYSTEM "${JSONCONS_INCLUDE_DIR}") +endif() + # ---------------------------------------------------------------------- # lz4 @@ -1201,16 +1333,16 @@ macro(build_sqlite_orm) message(STATUS "Building SQLITE_ORM-${SQLITE_ORM_VERSION} from source") set(SQLITE_ORM_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/sqlite_orm_ep-prefix") - set(SQLITE_ORM_TAR_NAME "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz") #sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz - if (NOT EXISTS ${SQLITE_ORM_TAR_NAME}) + set(SQLITE_ORM_TAR_NAME "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz") + set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}/include/sqlite_orm") + if (NOT EXISTS ${SQLITE_ORM_INCLUDE_DIR}) file(MAKE_DIRECTORY ${SQLITE_ORM_PREFIX}) - file(DOWNLOAD https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.tar.gz + file(DOWNLOAD ${SQLITE_ORM_SOURCE_URL} ${SQLITE_ORM_TAR_NAME}) execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${SQLITE_ORM_TAR_NAME} WORKING_DIRECTORY ${SQLITE_ORM_PREFIX}) endif () - set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}/include/sqlite_orm") #set(SQLITE_ORM_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/sqlite_orm_ep-prefix/src/sqlite_orm_ep") #set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/include/sqlite_orm") diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 56b3e5e139..ac5644dda8 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -58,6 +58,7 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") include_directories(thrift/gen-cpp) set(third_party_libs + arrow easyloggingpp sqlite thrift diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 6c290f9d1e..dde2d8bb03 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -1,33 +1,10 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Toolchain library versions -# -# This file is used by `download_dependencies.sh` and cmake to figure out which -# version of a dependency to fetch. In order to add a new dependency, add a -# version variable, e.g. MY_DEP_VERSION and append an entry in the -# `DEPENDENCIES` array (see the comment on top of the declaration for the -# format). - +ARROW_VERSION=zilliz BOOST_VERSION=1.70.0 BZIP2_VERSION=1.0.6 EASYLOGGINGPP_VERSION=v9.96.7 FAISS_VERSION=v1.5.2 GTEST_VERSION=1.8.1 +JSONCONS_VERSION=0.126.0 LAPACK_VERSION=v3.8.0 LZ4_VERSION=v1.9.1 OPENBLAS_VERSION=v0.3.6 @@ -41,28 +18,4 @@ YAMLCPP_VERSION=0.6.2 ZLIB_VERSION=v1.2.11 ZSTD_VERSION=v1.4.0 -# The first field is the name of the environment variable expected by cmake. -# This _must_ match what is defined. The second field is the name of the -# generated archive file. The third field is the url of the project for the -# given version. -DEPENDENCIES=( - "MEGASEARCH_BOOST_URL boost-${BOOST_VERSION}.tar.gz https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION//./_}.tar.gz" - "MEGASEARCH_BZIP2_URL bzip2-${BZIP2_VERSION}.tar.gz https://fossies.org/linux/misc/bzip2-${BZIP2_VERSION}.tar.gz" - "MEGASEARCH_EASYLOGGINGPP_URL easyloggingpp-${EASYLOGGINGPP_VERSION}.tar.gz https://github.com/zuhd-org/easyloggingpp/archive/${EASYLOGGINGPP_VERSION}.tar.gz" - "MEGASEARCH_FAISS_URL faiss-${FAISS_VERSION}.tar.gz https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz" - "MEGASEARCH_GTEST_URL gtest-${GTEST_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz" - "MEGASEARCH_LAPACK_URL lapack-${LAPACK_VERSION}.tar.gz https://github.com/Reference-LAPACK/lapack/archive/${LAPACK_VERSION}.tar.gz - "MEGASEARCH_LZ4_URL lz4-${LZ4_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz" - "MEGASEARCH_OPENBLAS_URL openblas-${OPENBLAS_VERSION}.tar.gz https://github.com/xianyi/OpenBLAS/archive/${OPENBLAS_VERSION}.tar.gz" - "MEGASEARCH_PROMETHEUS_URL https://github.com/jupp0r/prometheus-cpp.git" - "MEGASEARCH_ROCKSDB_URL rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/${ROCKSDB_VERSION}.tar.gz" - "MEGASEARCH_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz https://github.com/google/snappy/archive/${SNAPPY_VERSION}.tar.gz" - "MEGASEARCH_SQLITE_URL sqlite-autoconf-${SQLITE_VERSION}.tar.gz https://www.sqlite.org/2019/sqlite-autoconf-${SQLITE_VERSION}.tar.gz" - "MEGASEARCH_SQLITE_ORM_URL sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.tar.gz" - "MEGASEARCH_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz https://github.com/apache/thrift/archive/${THRIFT_VERSION}.tar.gz" - "MEGASEARCH_YAMLCPP_URL yaml-cpp-${YAMLCPP_VERSION}.tar.gz https://github.com/jbeder/yaml-cpp/archive/yaml-cpp-${YAMLCPP_VERSION}.tar.gz" - "MEGASEARCH_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz" - "MEGASEARCH_ZSTD_URL zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz" - ) - # vim: set filetype=sh: \ No newline at end of file