Merge branch 'arrow-0.3.0' into 'branch-0.3.0'

MS-84 : cmake: add arrow, jemalloc and jsoncons third party; default build option OFF

See merge request megasearch/vecwise_engine!89

Former-commit-id: 06112845de295b7fc05fe44306882a54f209af99
This commit is contained in:
jinhai 2019-06-14 10:50:49 +08:00
commit 4ba3a0707c
5 changed files with 145 additions and 54 deletions

View File

@ -27,6 +27,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-45 - Implement DeleteTable interface
- MS-75 - cmake: change faiss version to 1.5.2; add CUDA gencode
- MS-81 - fix faiss ptx issue; change cuda gencode
- MS-84 - cmake: add arrow, jemalloc and jsoncons third party; default build option OFF
## Task
- MS-74 - Change README.md in cpp

View File

@ -55,6 +55,8 @@ define_option_string(MEGASEARCH_DEPENDENCY_SOURCE
define_option(MEGASEARCH_VERBOSE_THIRDPARTY_BUILD
"Show output from ExternalProjects rather than just logging to files" ON)
define_option(MEGASEARCH_WITH_ARROW "Build with ARROW" OFF)
define_option(MEGASEARCH_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" OFF)
define_option(MEGASEARCH_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \
@ -77,6 +79,8 @@ define_option(MEGASEARCH_WITH_LAPACK "Build with LAPACK library" ON)
define_option(MEGASEARCH_WITH_LZ4 "Build with lz4 compression" ON)
define_option(MEGASEARCH_WITH_JSONCONS "Build with JSONCONS" OFF)
define_option(MEGASEARCH_WITH_OPENBLAS "Build with OpenBLAS library" ON)
define_option(MEGASEARCH_WITH_PROMETHEUS "Build with PROMETHEUS library" ON)

View File

@ -17,11 +17,13 @@
set(MEGASEARCH_THIRDPARTY_DEPENDENCIES
ARROW
BOOST
BZip2
Easylogging++
FAISS
GTest
JSONCONS
LAPACK
Lz4
OpenBLAS
@ -45,7 +47,9 @@ foreach(DEPENDENCY ${MEGASEARCH_THIRDPARTY_DEPENDENCIES})
endforeach()
macro(build_dependency DEPENDENCY_NAME)
if("${DEPENDENCY_NAME}" STREQUAL "BZip2")
if("${DEPENDENCY_NAME}" STREQUAL "ARROW")
build_arrow()
elseif("${DEPENDENCY_NAME}" STREQUAL "BZip2")
build_bzip2()
elseif("${DEPENDENCY_NAME}" STREQUAL "Easylogging++")
build_easyloggingpp()
@ -57,6 +61,8 @@ macro(build_dependency DEPENDENCY_NAME)
build_lz4()
elseif ("${DEPENDENCY_NAME}" STREQUAL "GTest")
build_gtest()
elseif ("${DEPENDENCY_NAME}" STREQUAL "JSONCONS")
build_jsoncons()
elseif ("${DEPENDENCY_NAME}" STREQUAL "OpenBLAS")
build_openblas()
elseif ("${DEPENDENCY_NAME}" STREQUAL "Prometheus")
@ -196,6 +202,14 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
set(${_LIB_NAME} "${_LIB_VERSION}")
endforeach()
if(DEFINED ENV{MEGASEARCH_ARROW_URL})
set(ARROW_SOURCE_URL "$ENV{MEGASEARCH_ARROW_URL}")
else()
set(ARROW_SOURCE_URL
"https://github.com/youny626/arrow.git"
)
endif()
if(DEFINED ENV{MEGASEARCH_BOOST_URL})
set(BOOST_SOURCE_URL "$ENV{MEGASEARCH_BOOST_URL}")
else()
@ -230,6 +244,13 @@ else ()
"https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz")
endif()
if (DEFINED ENV{MEGASEARCH_JSONCONS_URL})
set(JSONCONS_SOURCE_URL "$ENV{MEGASEARCH_JSONCONS_URL}")
else ()
set(JSONCONS_SOURCE_URL
"https://github.com/danielaparker/jsoncons/archive/v${JSONCONS_VERSION}.tar.gz")
endif()
if(DEFINED ENV{MEGASEARCH_LAPACK_URL})
set(LAPACK_SOURCE_URL "$ENV{MEGASEARCH_LAPACK_URL}")
else()
@ -310,6 +331,93 @@ else()
set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz")
endif()
# ----------------------------------------------------------------------
# ARROW
macro(build_arrow)
message(STATUS "Building Apache ARROW-${ARROW_VERSION} from source")
set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep/cpp")
set(ARROW_STATIC_LIB_NAME arrow)
# set(ARROW_CUDA_STATIC_LIB_NAME arrow_cuda)
set(ARROW_STATIC_LIB
"${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
# set(ARROW_CUDA_STATIC_LIB
# "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_CUDA_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
# )
set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
set(ARROW_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS}
# "-DARROW_THRIFT_URL=${THRIFT_SOURCE_URL}"
#"env ARROW_THRIFT_URL=${THRIFT_SOURCE_URL}"
-DARROW_BUILD_STATIC=ON
-DARROW_BUILD_SHARED=OFF
-DARROW_PARQUET=ON
-DARROW_USE_GLOG=OFF
-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}
"-DCMAKE_LIBRARY_PATH=${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs"
-DCMAKE_BUILD_TYPE=Release)
# set($ENV{ARROW_THRIFT_URL} ${THRIFT_SOURCE_URL})
externalproject_add(arrow_ep
GIT_REPOSITORY
${ARROW_SOURCE_URL}
GIT_TAG
${ARROW_VERSION}
GIT_SHALLOW
TRUE
# SOURCE_DIR
# ${ARROW_PREFIX}
# BINARY_DIR
# ${ARROW_PREFIX}
SOURCE_SUBDIR
cpp
# COMMAND
# "export \"ARROW_THRIFT_URL=${THRIFT_SOURCE_URL}\""
${EP_LOG_OPTIONS}
CMAKE_ARGS
${ARROW_CMAKE_ARGS}
BUILD_COMMAND
${MAKE}
${MAKE_BUILD_ARGS}
INSTALL_COMMAND
${MAKE} install
# BUILD_IN_SOURCE
# 1
BUILD_BYPRODUCTS
"${ARROW_STATIC_LIB}"
# "${ARROW_CUDA_STATIC_LIB}"
)
# ExternalProject_Add_StepDependencies(arrow_ep build thrift_ep)
file(MAKE_DIRECTORY "${ARROW_PREFIX}/include")
add_library(arrow STATIC IMPORTED)
set_target_properties(arrow
PROPERTIES IMPORTED_LOCATION "${ARROW_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}")
# INTERFACE_LINK_LIBRARIES thrift)
add_dependencies(arrow arrow_ep)
set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep-build/jemalloc_ep-prefix/src/jemalloc_ep")
add_custom_command(TARGET arrow_ep POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${ARROW_PREFIX}/lib/
COMMAND ${CMAKE_COMMAND} -E copy ${JEMALLOC_PREFIX}/lib/libjemalloc_pic.a ${ARROW_PREFIX}/lib/
DEPENDS ${JEMALLOC_PREFIX}/lib/libjemalloc_pic.a)
endmacro()
if(MEGASEARCH_WITH_ARROW)
resolve_dependency(ARROW)
link_directories(SYSTEM ${ARROW_PREFIX}/lib/)
include_directories(SYSTEM ${ARROW_INCLUDE_DIR})
endif()
# ----------------------------------------------------------------------
# Add Boost dependencies (code adapted from Apache Kudu (incubating))
@ -849,6 +957,30 @@ if (MEGASEARCH_BUILD_TESTS)
include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
endif()
# ----------------------------------------------------------------------
# JSONCONS
macro(build_jsoncons)
message(STATUS "Building JSONCONS-${JSONCONS_VERSION} from source")
set(JSONCONS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jsoncons_ep-prefix")
set(JSONCONS_TAR_NAME "${JSONCONS_PREFIX}/jsoncons-${JSONCONS_VERSION}.tar.gz")
set(JSONCONS_INCLUDE_DIR "${JSONCONS_PREFIX}/jsoncons-${JSONCONS_VERSION}/include")
if (NOT EXISTS ${JSONCONS_INCLUDE_DIR})
file(MAKE_DIRECTORY ${JSONCONS_PREFIX})
file(DOWNLOAD ${JSONCONS_SOURCE_URL}
${JSONCONS_TAR_NAME})
execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${JSONCONS_TAR_NAME}
WORKING_DIRECTORY ${JSONCONS_PREFIX})
endif ()
endmacro()
if(MEGASEARCH_WITH_JSONCONS)
resolve_dependency(JSONCONS)
include_directories(SYSTEM "${JSONCONS_INCLUDE_DIR}")
endif()
# ----------------------------------------------------------------------
# lz4
@ -1201,16 +1333,16 @@ macro(build_sqlite_orm)
message(STATUS "Building SQLITE_ORM-${SQLITE_ORM_VERSION} from source")
set(SQLITE_ORM_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/sqlite_orm_ep-prefix")
set(SQLITE_ORM_TAR_NAME "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz") #sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz
if (NOT EXISTS ${SQLITE_ORM_TAR_NAME})
set(SQLITE_ORM_TAR_NAME "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz")
set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}/include/sqlite_orm")
if (NOT EXISTS ${SQLITE_ORM_INCLUDE_DIR})
file(MAKE_DIRECTORY ${SQLITE_ORM_PREFIX})
file(DOWNLOAD https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.tar.gz
file(DOWNLOAD ${SQLITE_ORM_SOURCE_URL}
${SQLITE_ORM_TAR_NAME})
execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${SQLITE_ORM_TAR_NAME}
WORKING_DIRECTORY ${SQLITE_ORM_PREFIX})
endif ()
set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}/include/sqlite_orm")
#set(SQLITE_ORM_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/sqlite_orm_ep-prefix/src/sqlite_orm_ep")
#set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/include/sqlite_orm")

View File

@ -58,6 +58,7 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
include_directories(thrift/gen-cpp)
set(third_party_libs
arrow
easyloggingpp
sqlite
thrift

View File

@ -1,33 +1,10 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Toolchain library versions
#
# This file is used by `download_dependencies.sh` and cmake to figure out which
# version of a dependency to fetch. In order to add a new dependency, add a
# version variable, e.g. MY_DEP_VERSION and append an entry in the
# `DEPENDENCIES` array (see the comment on top of the declaration for the
# format).
ARROW_VERSION=zilliz
BOOST_VERSION=1.70.0
BZIP2_VERSION=1.0.6
EASYLOGGINGPP_VERSION=v9.96.7
FAISS_VERSION=v1.5.2
GTEST_VERSION=1.8.1
JSONCONS_VERSION=0.126.0
LAPACK_VERSION=v3.8.0
LZ4_VERSION=v1.9.1
OPENBLAS_VERSION=v0.3.6
@ -41,28 +18,4 @@ YAMLCPP_VERSION=0.6.2
ZLIB_VERSION=v1.2.11
ZSTD_VERSION=v1.4.0
# The first field is the name of the environment variable expected by cmake.
# This _must_ match what is defined. The second field is the name of the
# generated archive file. The third field is the url of the project for the
# given version.
DEPENDENCIES=(
"MEGASEARCH_BOOST_URL boost-${BOOST_VERSION}.tar.gz https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION//./_}.tar.gz"
"MEGASEARCH_BZIP2_URL bzip2-${BZIP2_VERSION}.tar.gz https://fossies.org/linux/misc/bzip2-${BZIP2_VERSION}.tar.gz"
"MEGASEARCH_EASYLOGGINGPP_URL easyloggingpp-${EASYLOGGINGPP_VERSION}.tar.gz https://github.com/zuhd-org/easyloggingpp/archive/${EASYLOGGINGPP_VERSION}.tar.gz"
"MEGASEARCH_FAISS_URL faiss-${FAISS_VERSION}.tar.gz https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz"
"MEGASEARCH_GTEST_URL gtest-${GTEST_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
"MEGASEARCH_LAPACK_URL lapack-${LAPACK_VERSION}.tar.gz https://github.com/Reference-LAPACK/lapack/archive/${LAPACK_VERSION}.tar.gz
"MEGASEARCH_LZ4_URL lz4-${LZ4_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz"
"MEGASEARCH_OPENBLAS_URL openblas-${OPENBLAS_VERSION}.tar.gz https://github.com/xianyi/OpenBLAS/archive/${OPENBLAS_VERSION}.tar.gz"
"MEGASEARCH_PROMETHEUS_URL https://github.com/jupp0r/prometheus-cpp.git"
"MEGASEARCH_ROCKSDB_URL rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/${ROCKSDB_VERSION}.tar.gz"
"MEGASEARCH_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz https://github.com/google/snappy/archive/${SNAPPY_VERSION}.tar.gz"
"MEGASEARCH_SQLITE_URL sqlite-autoconf-${SQLITE_VERSION}.tar.gz https://www.sqlite.org/2019/sqlite-autoconf-${SQLITE_VERSION}.tar.gz"
"MEGASEARCH_SQLITE_ORM_URL sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.tar.gz"
"MEGASEARCH_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz https://github.com/apache/thrift/archive/${THRIFT_VERSION}.tar.gz"
"MEGASEARCH_YAMLCPP_URL yaml-cpp-${YAMLCPP_VERSION}.tar.gz https://github.com/jbeder/yaml-cpp/archive/yaml-cpp-${YAMLCPP_VERSION}.tar.gz"
"MEGASEARCH_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz"
"MEGASEARCH_ZSTD_URL zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz"
)
# vim: set filetype=sh: