From c8727617e51c335090daa48a63c61801c41a83ae Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Thu, 16 Jun 2022 15:13:29 +0200 Subject: gnu: Add apache-arrow-0.16. * gnu/packages/databases.scm (apache-arrow-0.16): New variable. --- gnu/packages/databases.scm | 126 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm index 9a142da7ee..03ea55c6e1 100644 --- a/gnu/packages/databases.scm +++ b/gnu/packages/databases.scm @@ -4258,6 +4258,132 @@ language-bindings for structure manipulation. It also provides IPC and common algorithm implementations.") (license license:asl2.0))) +(define-public apache-arrow-0.16 + (package + (name "apache-arrow") + (version "0.16.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/apache/arrow") + (commit (string-append "apache-arrow-" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "15bplqy5708bxy1mynzjkd3d2g8v2wd36z8l0ap8yyyq54l3gdvy")))) + (build-system cmake-build-system) + (arguments + `(#:tests? #f + #:phases + (modify-phases %standard-phases + (add-before 'configure 'enter-source-directory + (lambda _ + (chdir "cpp") + (substitute* "src/parquet/CMakeLists.txt" + ((" parquet_constants.cpp") "") + (("set\\(THRIFT_OUTPUT_FILES \\$\\{THRIFT_OUTPUT_FILES\\}.*") "") + ((".*\"\\$\\{THRIFT_OUTPUT_DIR\\}/parquet_constants.cpp\"\\).*") "")))) + (add-after 'unpack 'set-env + (lambda _ + (setenv "BOOST_ROOT" (assoc-ref %build-inputs "boost")) + (setenv "BROTLI_HOME" (assoc-ref %build-inputs "brotli")) + (setenv "FLATBUFFERS_HOME" (assoc-ref %build-inputs "flatbuffers")) + (setenv "RAPIDJSON_HOME" (assoc-ref %build-inputs "rapidjson"))))) + #:build-type "Release" + #:configure-flags + (list "-DARROW_PYTHON=ON" + "-DARROW_GLOG=ON" + "-DARROW_SSE42=OFF" + "-DARROW_BOOST_USE_SHARED=ON" + ;; Parquet options + "-DARROW_PARQUET=ON" + + ;; The maintainers disallow using system versions of + ;; jemalloc: + ;; https://issues.apache.org/jira/browse/ARROW-3507. This + ;; is unfortunate because jemalloc increases performance: + ;; https://arrow.apache.org/blog/2018/07/20/jemalloc/. + "-DARROW_JEMALLOC=OFF" + + ;; The CMake option ARROW_DEPENDENCY_SOURCE is a global + ;; option that instructs the build system how to resolve + ;; each dependency. SYSTEM = Finding the dependency in + ;; system paths using CMake's built-in find_package + ;; function, or using pkg-config for packages that do not + ;; have this feature + "-DARROW_DEPENDENCY_SOURCE=SYSTEM" + + ;; Split output into its component packages. + (string-append "-DCMAKE_INSTALL_PREFIX=" + (assoc-ref %outputs "out")) + (string-append "-DCMAKE_INSTALL_RPATH=" + (assoc-ref %outputs "out") + "/lib") + (string-append "-DCMAKE_INSTALL_BINDIR=" + (assoc-ref %outputs "out") + "/bin") + (string-append "-DCMAKE_INSTALL_INCLUDEDIR=" + (assoc-ref %outputs "include") + "/share/include") + + + "-DARROW_WITH_SNAPPY=ON" + "-DARROW_WITH_ZLIB=ON" + "-DARROW_WITH_ZSTD=ON" + "-DARROW_WITH_LZ4=ON" + "-DARROW_COMPUTE=ON" + "-DARROW_CSV=ON" + "-DARROW_DATASET=ON" + "-DARROW_FILESYSTEM=ON" + "-DARROW_HDFS=ON" + "-DARROW_JSON=ON" + ;; Arrow Python C++ integration library (required for + ;; building pyarrow). This library must be built against + ;; the same Python version for which you are building + ;; pyarrow. NumPy must also be installed. Enabling this + ;; option also enables ARROW_COMPUTE, ARROW_CSV, + ;; ARROW_DATASET, ARROW_FILESYSTEM, ARROW_HDFS, and + ;; ARROW_JSON. + "-DARROW_PYTHON=ON" + + ;; Building the tests forces on all the + ;; optional features and the use of static + ;; libraries. + "-DARROW_BUILD_TESTS=OFF" + "-DBENCHMARK_ENABLE_GTEST_TESTS=OFF" + ;;"-DBENCHMARK_ENABLE_TESTING=OFF" + "-DARROW_BUILD_STATIC=OFF"))) + (inputs + `(("boost" ,boost) + ("brotli" ,google-brotli) + ("double-conversion" ,double-conversion) + ("snappy" ,snappy) + ("gflags" ,gflags) + ("glog" ,glog) + ("apache-thrift" ,apache-thrift "lib") + ("protobuf" ,protobuf) + ("rapidjson" ,rapidjson) + ("zlib" ,zlib) + ("bzip2" ,bzip2) + ("lz4" ,lz4) + ("zstd" ,zstd "lib") + ("re2" ,re2) + ("grpc" ,grpc) + ("python-3" ,python) + ("python-numpy" ,python-numpy))) + (native-inputs + (list pkg-config apache-thrift)) + (outputs '("out" "include")) + (home-page "https://arrow.apache.org/") + (synopsis "Columnar in-memory analytics") + (description "Apache Arrow is a columnar in-memory analytics layer +designed to accelerate big data. It houses a set of canonical in-memory +representations of flat and hierarchical data along with multiple +language-bindings for structure manipulation. It also provides IPC and common +algorithm implementations.") + (license license:asl2.0))) + (define-public python-pyarrow (package (inherit apache-arrow) -- cgit v1.2.3