diff --git a/cmake/Modules/ArrowExternalProject.cmake b/cmake/Modules/ArrowExternalProject.cmake index 625670f5f..d5b19c786 100644 --- a/cmake/Modules/ArrowExternalProject.cmake +++ b/cmake/Modules/ArrowExternalProject.cmake @@ -14,7 +14,10 @@ # - PLASMA_SHARED_LIB set(arrow_URL https://github.com/apache/arrow.git) -set(arrow_TAG 927bd34aaad875e82beca2584d5d777839fa8bb0) +# The PR for this commit is https://github.com/apache/arrow/pull/2522. We +# include the link here to make it easier to find the right commit because +# Arrow often rewrites git history and invalidates certain commits. +set(arrow_TAG 7104d64ff2cd6c20e29d3cf4ec5c58bc10798f66) set(ARROW_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/external/arrow-install) set(ARROW_HOME ${ARROW_INSTALL_PREFIX}) @@ -53,11 +56,27 @@ set(ARROW_CMAKE_ARGS -DARROW_JEMALLOC=off -DARROW_WITH_BROTLI=off -DARROW_WITH_LZ4=off - -DARROW_WITH_ZLIB=off -DARROW_WITH_ZSTD=off -DFLATBUFFERS_HOME=${FLATBUFFERS_HOME} -DBOOST_ROOT=${BOOST_ROOT}) +if ("${CMAKE_RAY_LANG_PYTHON}" STREQUAL "YES") + # PyArrow needs following settings. + set(ARROW_CMAKE_ARGS ${ARROW_CMAKE_ARGS} + -DARROW_WITH_THRIFT=ON + -DARROW_PARQUET=ON + -DARROW_WITH_ZLIB=ON) +else() + set(ARROW_CMAKE_ARGS ${ARROW_CMAKE_ARGS} + -DARROW_WITH_THRIFT=OFF + -DARROW_PARQUET=OFF + -DARROW_WITH_ZLIB=OFF) +endif () +if (APPLE) + set(ARROW_CMAKE_ARGS ${ARROW_CMAKE_ARGS} + -DBISON_EXECUTABLE=/usr/local/opt/bison/bin/bison) +endif() + if ("${CMAKE_RAY_LANG_JAVA}" STREQUAL "YES") set(ARROW_CMAKE_ARGS ${ARROW_CMAKE_ARGS} -DARROW_PLASMA_JAVA_CLIENT=ON) endif () diff --git a/cmake/Modules/ParquetExternalProject.cmake b/cmake/Modules/ParquetExternalProject.cmake deleted file mode 100644 index 7f5fdfe20..000000000 --- a/cmake/Modules/ParquetExternalProject.cmake +++ /dev/null @@ -1,45 +0,0 @@ -# parquet external project -# target: -# - parquet_ep -# depends: -# - arrow_ep -# defines: -# - PARQUET_HOME -# - PARQUET_INCLUDE_DIR -# - PARQUET_STATIC_LIB -# - PARQUET_SHARED_LIB - -include(ExternalProject) - -set(parquet_URL https://github.com/apache/parquet-cpp.git) -set(parquet_TAG 63f41b00bddecb172bd5b3aa0366b4653f498811) - -# install parquet into arrow_home... -set(PARQUET_INSTALL_PREFIX ${ARROW_HOME}) -set(PARQUET_HOME ${PARQUET_INSTALL_PREFIX}) -set(PARQUET_INCLUDE_DIR ${PARQUET_INSTALL_PREFIX}/include) -set(PARQUET_STATIC_LIB ${PARQUET_INSTALL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}parquet.a) -set(PARQUET_SHARED_LIB ${PARQUET_INSTALL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}parquet${CMAKE_SHARED_LIBRARY_SUFFIX}) - -set(parquet_CMAKE_ARGS - -DCMAKE_BUILD_TYPE:STRING=Release - -DCMAKE_INSTALL_PREFIX=${PARQUET_INSTALL_PREFIX} - -DARROW_HOME=${ARROW_HOME} - -DBOOST_ROOT=${BOOST_ROOT} - -DPARQUET_BUILD_BENCHMARKS=off - -DPARQUET_BUILD_EXECUTABLES=off - -DPARQUET_BUILD_TESTS=off) - -set(parquet_ENV "") -if (APPLE) - set(parquet_ENV "PATH=/usr/local/opt/bison/bin:$ENV{PATH}" - "OPENSSL_ROOT_DIR=/usr/local/opt/openssl") -endif () - -ExternalProject_Add(parquet_ep - PREFIX external/parquet - DEPENDS arrow_ep - GIT_REPOSITORY ${parquet_URL} - GIT_TAG ${parquet_TAG} - BUILD_COMMAND ${CMAKE_COMMAND} -E env ${parquet_ENV} $(MAKE) - CMAKE_ARGS ${parquet_CMAKE_ARGS}) diff --git a/cmake/Modules/ThirdpartyToolchain.cmake b/cmake/Modules/ThirdpartyToolchain.cmake index 11d7428ae..bb58b61b1 100644 --- a/cmake/Modules/ThirdpartyToolchain.cmake +++ b/cmake/Modules/ThirdpartyToolchain.cmake @@ -150,19 +150,6 @@ ADD_THIRDPARTY_LIB(plasma STATIC_LIB ${PLASMA_STATIC_LIB}) add_dependencies(plasma plasma_ep) if ("${CMAKE_RAY_LANG_PYTHON}" STREQUAL "YES") - # Apache parquet cpp - include(ParquetExternalProject) - - message(STATUS "Parquet home: ${PARQUET_HOME}") - message(STATUS "Parquet include dir: ${PARQUET_INCLUDE_DIR}") - message(STATUS "Parquet static library: ${PARQUET_STATIC_LIB}") - message(STATUS "Parquet shared library: ${PARQUET_SHARED_LIB}") - include_directories(SYSTEM ${PARQUET_INCLUDE_DIR}) - - ADD_THIRDPARTY_LIB(parquet STATIC_LIB ${PARQUET_STATIC_LIB}) - - add_dependencies(parquet parquet_ep) - # pyarrow find_package(PythonInterp REQUIRED) message(STATUS "PYTHON_EXECUTABLE for pyarrow: ${PYTHON_EXECUTABLE}") @@ -185,7 +172,7 @@ if ("${CMAKE_RAY_LANG_PYTHON}" STREQUAL "YES") # add_custom_command would have problem with setup.py ExternalProject_Add(pyarrow_ext PREFIX external/pyarrow - DEPENDS parquet_ep + DEPENDS arrow_ep DOWNLOAD_COMMAND "" BUILD_IN_SOURCE 1 CONFIGURE_COMMAND cd ${ARROW_SOURCE_DIR}/python && ${CMAKE_COMMAND} -E env ${pyarrow_ENV} ${PYTHON_EXECUTABLE} setup.py build diff --git a/thirdparty/scripts/build_parquet.sh b/thirdparty/scripts/build_parquet.sh deleted file mode 100755 index 8dc718a6b..000000000 --- a/thirdparty/scripts/build_parquet.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash - -set -x - -# Cause the script to exit if a single command fails. -set -e - -unamestr="$(uname)" -TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)/../ -PARQUET_HOME=$TP_DIR/pkg/arrow/cpp/build/cpp-install -OPENSSL_DIR=/usr/local/opt/openssl -BISON_DIR=/usr/local/opt/bison/bin -TARGET_COMMIT_ID=63f41b00bddecb172bd5b3aa0366b4653f498811 - -build_parquet() { - echo "Building Parquet" - if [ "$unamestr" == "Darwin" ]; then - OPENSSL_ROOT_DIR=$OPENSSL_DIR \ - PATH="$BISON_DIR:$PATH" \ - BOOST_ROOT=$TP_DIR/pkg/boost \ - ARROW_HOME=$TP_DIR/pkg/arrow/cpp/build/cpp-install \ - cmake -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \ - -DPARQUET_BUILD_BENCHMARKS=off \ - -DPARQUET_BUILD_EXECUTABLES=off \ - -DPARQUET_BUILD_TESTS=off \ - . - - OPENSSL_ROOT_DIR=$OPENSSL_DIR \ - PATH="$BISON_DIR:$PATH" \ - make -j4 - - OPENSSL_ROOT_DIR=$OPENSSL_DIR \ - PATH="$BISON_DIR:$PATH" \ - make install - else - BOOST_ROOT=$TP_DIR/pkg/boost \ - ARROW_HOME=$TP_DIR/pkg/arrow/cpp/build/cpp-install \ - cmake -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \ - -DPARQUET_BUILD_BENCHMARKS=off \ - -DPARQUET_BUILD_EXECUTABLES=off \ - -DPARQUET_BUILD_TESTS=off \ - . - - PARQUET_HOME=$TP_DIR/pkg/arrow/cpp/build/cpp-install \ - BOOST_ROOT=$TP_DIR/pkg/boost \ - make -j4 - make install - fi -} - -if [ ! -d $TP_DIR/build/parquet-cpp/.git ]; then - if [[ -d $TP_DIR/build/parquet-cpp ]]; then - rm -rf $TP_DIR/build/parquet-cpp - fi - git clone -q https://github.com/apache/parquet-cpp.git "$TP_DIR/build/parquet-cpp" - pushd $TP_DIR/build/parquet-cpp - git fetch origin master - git checkout $TARGET_COMMIT_ID - - build_parquet - popd -else - pushd $TP_DIR/build/parquet-cpp - if [[ "$TARGET_COMMIT_ID" != `git rev-parse HEAD` ]]; then - # TARGET_COMMIT_ID may change to later commit. - echo "Commit ID mismatches." - git fetch origin master - git checkout $TARGET_COMMIT_ID - build_parquet - fi - - popd -fi diff --git a/thirdparty/scripts/setup.sh b/thirdparty/scripts/setup.sh index 417ba2609..27f1ef0e3 100755 --- a/thirdparty/scripts/setup.sh +++ b/thirdparty/scripts/setup.sh @@ -57,11 +57,6 @@ bash "$TP_SCRIPT_DIR/build_credis.sh" #RAY_BUILD_JAVA=$RAY_BUILD_JAVA \ #bash "$TP_SCRIPT_DIR/build_arrow.sh" $PYTHON_EXECUTABLE -############################################## -# parquet (skipped as it is inlined in build_arrow.sh) -############################################## -# bash "$TP_SCRIPT_DIR/build_parquet.sh" - ############################################## # catapult ##############################################