Rebase numbuf to latest arrow (#23)

* rebase to latest arrow

* fix arrow linking

* finish rebase

* point arrow to pcmoritz's arrow

* fix

* fix macOS

* fix
This commit is contained in:
Philipp Moritz 2016-11-19 02:27:21 -08:00 committed by Robert Nishihara
parent c3db225cbe
commit c3ab68e88c
15 changed files with 163 additions and 86 deletions

2
.gitmodules vendored
View file

@ -1,3 +1,3 @@
[submodule "thirdparty/arrow"]
path = thirdparty/arrow
url = https://github.com/ray-project/arrow.git
url = https://github.com/pcmoritz/arrow.git

View file

@ -4,6 +4,10 @@ project(numbuf)
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
# Make libnumbuf.so look for shared libraries in the folder libnumbuf.so is in
set(CMAKE_INSTALL_RPATH "$ORIGIN/")
set(CMAKE_MACOSX_RPATH 1)
if(NOT APPLE)
find_package(PythonInterp REQUIRED)
find_package(PythonLibs REQUIRED)
@ -53,8 +57,25 @@ endif()
set(ARROW_DIR "${CMAKE_SOURCE_DIR}/thirdparty/arrow/" CACHE STRING
"Path of the arrow source directory")
set(ARROW_STATIC_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow.a" CACHE STRING
"Path to libarrow.a (needs to be changed if arrow is build in debug mode)")
if (APPLE)
set(ARROW_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow.dylib" CACHE STRING
"Path to libarrow.dylib (needs to be changed if arrow is build in debug mode)")
set(ARROW_IO_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow_io.dylib" CACHE STRING
"Path to libarrow_io.dylib (needs to be changed if arrow is build in debug mode)")
set(ARROW_IPC_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow_ipc.dylib" CACHE STRING
"Path to libarrow_ipc.dylib (needs to be changed if arrow is build in debug mode)")
else()
set(ARROW_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow.so" CACHE STRING
"Path to libarrow.so (needs to be changed if arrow is build in debug mode)")
set(ARROW_IO_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow_io.so" CACHE STRING
"Path to libarrow_io.so (needs to be changed if arrow is build in debug mode)")
set(ARROW_IPC_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow_ipc.so" CACHE STRING
"Path to libarrow_ipc.so (needs to be changed if arrow is build in debug mode)")
endif()
include_directories("${ARROW_DIR}/cpp/src/")
include_directories("cpp/src/")
@ -75,8 +96,18 @@ if(APPLE)
add_custom_command(TARGET numbuf
POST_BUILD COMMAND
${CMAKE_INSTALL_NAME_TOOL} -change ${PYTHON_SHARED_LIBRARY} ${PYTHON_LIBRARIES} libnumbuf.so)
add_custom_command(TARGET numbuf
POST_BUILD COMMAND
${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libarrow.dylib" "@loader_path/libarrow.dylib" libnumbuf.so)
add_custom_command(TARGET numbuf
POST_BUILD COMMAND
${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libarrow_io.dylib" "@loader_path/libarrow_io.dylib" libnumbuf.so)
add_custom_command(TARGET numbuf
POST_BUILD COMMAND
${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libarrow_ipc.dylib" "@loader_path/libarrow_ipc.dylib" libnumbuf.so)
endif(APPLE)
target_link_libraries(numbuf ${ARROW_STATIC_LIB} ${PYTHON_LIBRARIES})
target_link_libraries(numbuf ${ARROW_LIB} ${ARROW_IO_LIB} ${ARROW_IPC_LIB} ${PYTHON_LIBRARIES})
install(TARGETS numbuf DESTINATION ${CMAKE_SOURCE_DIR})
install(TARGETS numbuf DESTINATION ${CMAKE_SOURCE_DIR}/numbuf/)
install(FILES ${ARROW_LIB} ${ARROW_IO_LIB} ${ARROW_IPC_LIB} DESTINATION ${CMAKE_SOURCE_DIR}/numbuf/)

View file

@ -15,6 +15,6 @@ fi
mkdir -p "$ROOT_DIR/build"
pushd "$ROOT_DIR/build"
cmake -DCMAKE_BUILD_TYPE=Release ..
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-g" -DCMAKE_CXX_FLAGS="-g" ..
make install -j$PARALLEL
popd

View file

@ -4,21 +4,24 @@ using namespace arrow;
namespace numbuf {
std::shared_ptr<arrow::StructArray> DictBuilder::Finish(
Status DictBuilder::Finish(
std::shared_ptr<Array> key_tuple_data,
std::shared_ptr<Array> val_list_data,
std::shared_ptr<Array> val_tuple_data,
std::shared_ptr<Array> val_dict_data) {
std::shared_ptr<Array> val_dict_data,
std::shared_ptr<arrow::Array>* out) {
// lists and dicts can't be keys of dicts in Python, that is why for
// the keys we do not need to collect sublists
auto keys = keys_.Finish(nullptr, key_tuple_data, nullptr);
auto vals = vals_.Finish(val_list_data, val_tuple_data, val_dict_data);
std::shared_ptr<Array> keys, vals;
RETURN_NOT_OK(keys_.Finish(nullptr, key_tuple_data, nullptr, &keys));
RETURN_NOT_OK(vals_.Finish(val_list_data, val_tuple_data, val_dict_data, &vals));
auto keys_field = std::make_shared<Field>("keys", keys->type());
auto vals_field = std::make_shared<Field>("vals", vals->type());
auto type = std::make_shared<StructType>(std::vector<FieldPtr>({keys_field, vals_field}));
std::vector<ArrayPtr> field_arrays({keys, vals});
DCHECK(keys->length() == vals->length());
return std::make_shared<StructArray>(type, keys->length(), field_arrays);
out->reset(new StructArray(type, keys->length(), field_arrays));
return Status::OK();
}
}

View file

@ -33,11 +33,12 @@ public:
List containing the data from nested dictionaries in the
value list of the dictionary
*/
std::shared_ptr<arrow::StructArray> Finish(
arrow::Status Finish(
std::shared_ptr<arrow::Array> key_tuple_data,
std::shared_ptr<arrow::Array> val_list_data,
std::shared_ptr<arrow::Array> val_tuple_data,
std::shared_ptr<arrow::Array> val_dict_data);
std::shared_ptr<arrow::Array> val_dict_data,
std::shared_ptr<arrow::Array>* out);
private:
SequenceBuilder keys_;

View file

@ -5,13 +5,16 @@ using namespace arrow;
namespace numbuf {
SequenceBuilder::SequenceBuilder(MemoryPool* pool)
: pool_(pool), types_(pool), offsets_(pool),
: pool_(pool),
types_(pool, std::make_shared<Int8Type>()),
offsets_(pool, std::make_shared<Int32Type>()),
nones_(pool, std::make_shared<NullType>()),
bools_(pool, std::make_shared<BooleanType>()),
ints_(pool),
ints_(pool, std::make_shared<Int64Type>()),
bytes_(pool, std::make_shared<BinaryType>()),
strings_(pool, std::make_shared<StringType>()),
floats_(pool), doubles_(pool),
floats_(pool, std::make_shared<FloatType>()),
doubles_(pool, std::make_shared<DoubleType>()),
uint8_tensors_(std::make_shared<UInt8Type>(), pool),
int8_tensors_(std::make_shared<Int8Type>(), pool),
uint16_tensors_(std::make_shared<UInt16Type>(), pool),
@ -76,6 +79,9 @@ Status SequenceBuilder::AppendDouble(double data) {
#define DEF_TENSOR_APPEND(NAME, TYPE, TAG) \
Status SequenceBuilder::AppendTensor(const std::vector<int64_t>& dims, TYPE* data) { \
if (TAG == -1) { \
NAME.Start(); \
} \
UPDATE(NAME.length(), TAG); \
return NAME.Append(dims, data); \
}
@ -109,11 +115,11 @@ Status SequenceBuilder::AppendDict(int32_t size) {
return Status::OK();
}
#define ADD_ELEMENT(VARNAME, TAG) \
if (TAG != -1) { \
types[TAG] = VARNAME.type(); \
children[TAG] = VARNAME.Finish(); \
ARROW_CHECK_OK(nones_.AppendToBitmap(true)); \
#define ADD_ELEMENT(VARNAME, TAG) \
if (TAG != -1) { \
types[TAG] = std::make_shared<Field>("", VARNAME.type()); \
RETURN_NOT_OK(VARNAME.Finish(&children[TAG])); \
RETURN_NOT_OK(nones_.AppendToBitmap(true)); \
}
#define ADD_SUBSEQUENCE(DATA, OFFSETS, BUILDER, TAG, NAME) \
@ -132,12 +138,13 @@ Status SequenceBuilder::AppendDict(int32_t size) {
DCHECK(OFFSETS.size() == 1); \
}
std::shared_ptr<DenseUnionArray> SequenceBuilder::Finish(
Status SequenceBuilder::Finish(
std::shared_ptr<Array> list_data,
std::shared_ptr<Array> tuple_data,
std::shared_ptr<Array> dict_data) {
std::shared_ptr<Array> dict_data,
std::shared_ptr<Array>* out) {
std::vector<TypePtr> types(num_tags);
std::vector<std::shared_ptr<Field>> types(num_tags);
std::vector<ArrayPtr> children(num_tags);
ADD_ELEMENT(bools_, bool_tag);
@ -165,11 +172,12 @@ std::shared_ptr<DenseUnionArray> SequenceBuilder::Finish(
ADD_SUBSEQUENCE(tuple_data, tuple_offsets_, tuple_builder, tuple_tag, "tuple");
ADD_SUBSEQUENCE(dict_data, dict_offsets_, dict_builder, dict_tag, "dict");
TypePtr type = TypePtr(new DenseUnionType(types));
return std::make_shared<DenseUnionArray>(type, types_.length(),
std::vector<uint8_t> type_ids = {};
TypePtr type = TypePtr(new UnionType(types, type_ids, UnionMode::DENSE));
out->reset(new UnionArray(type, types_.length(),
children, types_.data(), offsets_.data(),
nones_.null_count(), nones_.null_bitmap());
nones_.null_count(), nones_.null_bitmap()));
return Status::OK();
}
}

View file

@ -79,10 +79,11 @@ class SequenceBuilder {
arrow::Status AppendDict(int32_t size);
//! Finish building the sequence and return the result
std::shared_ptr<arrow::DenseUnionArray> Finish(
arrow::Status Finish(
std::shared_ptr<arrow::Array> list_data,
std::shared_ptr<arrow::Array> tuple_data,
std::shared_ptr<arrow::Array> dict_data);
std::shared_ptr<arrow::Array> dict_data,
std::shared_ptr<arrow::Array>* out);
private:
arrow::MemoryPool* pool_;

View file

@ -6,19 +6,24 @@ namespace numbuf {
template<typename T>
TensorBuilder<T>::TensorBuilder(const TypePtr& dtype, MemoryPool* pool)
: dtype_(dtype) {
dim_data_ = std::make_shared<Int64Builder>(pool);
dims_ = std::make_shared<ListBuilder>(pool, dim_data_);
value_data_ = std::make_shared<PrimitiveBuilder<T>>(pool, dtype);
values_ = std::make_shared<ListBuilder>(pool, value_data_);
: dtype_(dtype), pool_(pool) {}
template<typename T>
Status TensorBuilder<T>::Start() {
dim_data_ = std::make_shared<Int64Builder>(pool_, std::make_shared<Int64Type>());
dims_ = std::make_shared<ListBuilder>(pool_, dim_data_);
value_data_ = std::make_shared<PrimitiveBuilder<T>>(pool_, dtype_);
values_ = std::make_shared<ListBuilder>(pool_, value_data_);
auto dims_field = std::make_shared<Field>("dims", dims_->type());
auto values_field = std::make_shared<Field>("data", values_->type());
auto type = std::make_shared<StructType>(std::vector<FieldPtr>({dims_field, values_field}));
tensors_ = std::make_shared<StructBuilder>(pool, type, std::vector<std::shared_ptr<ArrayBuilder>>({dims_, values_}));
};
tensors_ = std::make_shared<StructBuilder>(pool_, type, std::vector<std::shared_ptr<ArrayBuilder>>({dims_, values_}));
return Status::OK();
}
template<typename T>
Status TensorBuilder<T>::Append(const std::vector<int64_t>& dims, const elem_type* data) {
DCHECK(tensors_);
RETURN_NOT_OK(tensors_->Append());
RETURN_NOT_OK(dims_->Append());
RETURN_NOT_OK(values_->Append());
@ -32,8 +37,8 @@ Status TensorBuilder<T>::Append(const std::vector<int64_t>& dims, const elem_typ
}
template<typename T>
std::shared_ptr<Array> TensorBuilder<T>::Finish() {
return tensors_->Finish();
Status TensorBuilder<T>::Finish(std::shared_ptr<Array>* out) {
return tensors_->Finish(out);
}
template class TensorBuilder<UInt8Type>;

View file

@ -18,6 +18,8 @@ public:
typedef typename T::c_type elem_type;
TensorBuilder(const arrow::TypePtr& dtype, arrow::MemoryPool* pool = nullptr);
arrow::Status Start();
/*! Append a new tensor.
@ -31,7 +33,7 @@ public:
arrow::Status Append(const std::vector<int64_t>& dims, const elem_type* data);
//! Convert the tensors to an Arrow StructArray
std::shared_ptr<arrow::Array> Finish();
arrow::Status Finish(std::shared_ptr<arrow::Array>* out);
//! Number of tensors in the column
int32_t length() {
@ -44,6 +46,7 @@ public:
private:
arrow::TypePtr dtype_;
arrow::MemoryPool* pool_;
std::shared_ptr<arrow::Int64Builder> dim_data_;
std::shared_ptr<arrow::ListBuilder> dims_;
std::shared_ptr<arrow::PrimitiveBuilder<T>> value_data_;

View file

@ -166,12 +166,11 @@ Status SerializeSequences(std::vector<PyObject*> sequences, int32_t recursion_de
if (subdicts.size() > 0) {
RETURN_NOT_OK(SerializeDict(subdicts, recursion_depth + 1, &dict));
}
*out = builder.Finish(list, tuple, dict);
return Status::OK();
return builder.Finish(list, tuple, dict, out);
}
#define DESERIALIZE_SEQUENCE(CREATE, SET_ITEM) \
auto data = std::dynamic_pointer_cast<DenseUnionArray>(array); \
auto data = std::dynamic_pointer_cast<UnionArray>(array); \
int32_t size = array->length(); \
PyObject* result = CREATE(stop_idx - start_idx); \
auto types = std::make_shared<Int8Array>(size, data->types()); \
@ -231,7 +230,7 @@ Status SerializeDict(std::vector<PyObject*> dicts, int32_t recursion_depth, std:
if (val_dicts.size() > 0) {
RETURN_NOT_OK(SerializeDict(val_dicts, recursion_depth + 1, &val_dict_arr));
}
*out = result.Finish(key_tuples_arr, val_list_arr, val_tuples_arr, val_dict_arr);
result.Finish(key_tuples_arr, val_list_arr, val_tuples_arr, val_dict_arr, out);
// This block is used to decrement the reference counts of the results
// returned by the serialization callback, which is called in SerializeArray

View file

@ -1,22 +1,32 @@
#ifndef PYNUMBUF_MEMORY_H
#define PYNUMBUF_MEMORY_H
#include <arrow/ipc/memory.h>
#include <arrow/io/interfaces.h>
namespace numbuf {
class BufferSource : public arrow::ipc::MemorySource {
class FixedBufferStream : public arrow::io::OutputStream, public arrow::io::ReadableFileInterface {
public:
virtual ~BufferSource() {}
virtual ~FixedBufferStream() {}
explicit BufferSource(uint8_t* data, int64_t nbytes)
: data_(data), size_(nbytes) {}
explicit FixedBufferStream(uint8_t* data, int64_t nbytes)
: data_(data), position_(0), size_(nbytes) {}
arrow::Status ReadAt(int64_t position, int64_t nbytes,
std::shared_ptr<arrow::Buffer>* out) override {
arrow::Status Read(int64_t nbytes, std::shared_ptr<arrow::Buffer>* out) override {
DCHECK(out);
DCHECK(position + nbytes <= size_) << "position: " << position << " nbytes: " << nbytes << "size: " << size_;
*out = std::make_shared<arrow::Buffer>(data_ + position, nbytes);
DCHECK(position_ + nbytes <= size_) << "position: " << position_ << " nbytes: " << nbytes << "size: " << size_;
*out = std::make_shared<arrow::Buffer>(data_ + position_, nbytes);
position_ += nbytes;
return arrow::Status::OK();
}
arrow::Status Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) {
assert(0);
return arrow::Status::OK();
}
arrow::Status Seek(int64_t position) override {
position_ = position;
return arrow::Status::OK();
}
@ -24,24 +34,35 @@ class BufferSource : public arrow::ipc::MemorySource {
return arrow::Status::OK();
}
arrow::Status Write(int64_t position, const uint8_t* data,
int64_t nbytes) override {
DCHECK(position >= 0 && position < size_);
DCHECK(position + nbytes <= size_) << "position: " << position << " nbytes: " << nbytes << "size: " << size_;
uint8_t* dst = data_ + position;
memcpy(dst, data, nbytes);
arrow::Status Tell(int64_t* position) override {
*position = position_;
return arrow::Status::OK();
}
int64_t Size() const override {
return size_;
arrow::Status Write(const uint8_t* data, int64_t nbytes) override {
DCHECK(position_ >= 0 && position_ < size_);
DCHECK(position_ + nbytes <= size_) << "position: " << position_ << " nbytes: " << nbytes << "size: " << size_;
uint8_t* dst = data_ + position_;
memcpy(dst, data, nbytes);
position_ += nbytes;
return arrow::Status::OK();
}
arrow::Status GetSize(int64_t *size) override {
*size = size_;
return arrow::Status::OK();
}
bool supports_zero_copy() const override {
return true;
}
private:
uint8_t* data_;
int64_t position_;
int64_t size_;
};
} // namespace
} // namespace numbuf
#endif // PYNUMBUF_MEMORY_H

View file

@ -1,6 +1,5 @@
#include <Python.h>
#include <arrow/api.h>
#include <arrow/ipc/memory.h>
#include <arrow/ipc/adapter.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define PY_ARRAY_UNIQUE_SYMBOL NUMBUF_ARRAY_API
@ -16,10 +15,10 @@
using namespace arrow;
using namespace numbuf;
std::shared_ptr<RowBatch> make_row_batch(std::shared_ptr<Array> data) {
std::shared_ptr<RecordBatch> make_row_batch(std::shared_ptr<Array> data) {
auto field = std::make_shared<Field>("list", data->type());
std::shared_ptr<Schema> schema(new Schema({field}));
return std::shared_ptr<RowBatch>(new RowBatch(schema, data->length(), {data}));
return std::shared_ptr<RecordBatch>(new RecordBatch(schema, data->length(), {data}));
}
extern "C" {
@ -29,9 +28,9 @@ static PyObject *NumbufError;
PyObject *numbuf_serialize_callback = NULL;
PyObject *numbuf_deserialize_callback = NULL;
int PyObjectToArrow(PyObject* object, std::shared_ptr<RowBatch> **result) {
int PyObjectToArrow(PyObject* object, std::shared_ptr<RecordBatch> **result) {
if (PyCapsule_IsValid(object, "arrow")) {
*result = reinterpret_cast<std::shared_ptr<RowBatch>*>(PyCapsule_GetPointer(object, "arrow"));
*result = reinterpret_cast<std::shared_ptr<RecordBatch>*>(PyCapsule_GetPointer(object, "arrow"));
return 1;
} else {
PyErr_SetString(PyExc_TypeError, "must be an 'arrow' capsule");
@ -40,7 +39,7 @@ int PyObjectToArrow(PyObject* object, std::shared_ptr<RowBatch> **result) {
}
static void ArrowCapsule_Destructor(PyObject* capsule) {
delete reinterpret_cast<std::shared_ptr<RowBatch>*>(PyCapsule_GetPointer(capsule, "arrow"));
delete reinterpret_cast<std::shared_ptr<RecordBatch>*>(PyCapsule_GetPointer(capsule, "arrow"));
}
/* Documented in doc/numbuf.rst in ray-core */
@ -62,11 +61,11 @@ static PyObject* serialize_list(PyObject* self, PyObject* args) {
return NULL;
}
auto batch = new std::shared_ptr<RowBatch>();
auto batch = new std::shared_ptr<RecordBatch>();
*batch = make_row_batch(array);
int64_t size = 0;
ARROW_CHECK_OK(arrow::ipc::GetRowBatchSize(batch->get(), &size));
ARROW_CHECK_OK(arrow::ipc::GetRecordBatchSize(batch->get(), &size));
std::shared_ptr<Buffer> buffer;
ARROW_CHECK_OK(ipc::WriteSchema((*batch)->schema().get(), &buffer));
@ -84,7 +83,7 @@ static PyObject* serialize_list(PyObject* self, PyObject* args) {
/* Documented in doc/numbuf.rst in ray-core */
static PyObject* write_to_buffer(PyObject* self, PyObject* args) {
std::shared_ptr<RowBatch>* batch;
std::shared_ptr<RecordBatch>* batch;
PyObject* memoryview;
if (!PyArg_ParseTuple(args, "O&O", &PyObjectToArrow, &batch, &memoryview)) {
return NULL;
@ -93,10 +92,11 @@ static PyObject* write_to_buffer(PyObject* self, PyObject* args) {
return NULL;
}
Py_buffer* buffer = PyMemoryView_GET_BUFFER(memoryview);
auto target = std::make_shared<BufferSource>(reinterpret_cast<uint8_t*>(buffer->buf), buffer->len);
int64_t metadata_offset;
ARROW_CHECK_OK(ipc::WriteRowBatch(target.get(), batch->get(), 0, &metadata_offset));
return PyInt_FromLong(metadata_offset);
auto target = std::make_shared<FixedBufferStream>(reinterpret_cast<uint8_t*>(buffer->buf), buffer->len);
int64_t body_end_offset;
int64_t header_end_offset;
ARROW_CHECK_OK(ipc::WriteRecordBatch((*batch)->columns(), (*batch)->num_rows(), target.get(), &body_end_offset, &header_end_offset));
return PyInt_FromLong(header_end_offset);
}
/* Documented in doc/numbuf.rst in ray-core */
@ -118,11 +118,11 @@ static PyObject* read_from_buffer(PyObject* self, PyObject* args) {
ARROW_CHECK_OK(schema_msg->GetSchema(&schema));
Py_buffer* buffer = PyMemoryView_GET_BUFFER(memoryview);
auto source = std::make_shared<BufferSource>(reinterpret_cast<uint8_t*>(buffer->buf), buffer->len);
std::shared_ptr<arrow::ipc::RowBatchReader> reader;
ARROW_CHECK_OK(arrow::ipc::RowBatchReader::Open(source.get(), metadata_offset, &reader));
auto batch = new std::shared_ptr<arrow::RowBatch>();
ARROW_CHECK_OK(reader->GetRowBatch(schema, batch));
auto source = std::make_shared<FixedBufferStream>(reinterpret_cast<uint8_t*>(buffer->buf), buffer->len);
std::shared_ptr<arrow::ipc::RecordBatchReader> reader;
ARROW_CHECK_OK(arrow::ipc::RecordBatchReader::Open(source.get(), metadata_offset, &reader));
auto batch = new std::shared_ptr<arrow::RecordBatch>();
ARROW_CHECK_OK(reader->GetRecordBatch(schema, batch));
return PyCapsule_New(reinterpret_cast<void*>(batch),
"arrow", &ArrowCapsule_Destructor);
@ -130,7 +130,7 @@ static PyObject* read_from_buffer(PyObject* self, PyObject* args) {
/* Documented in doc/numbuf.rst in ray-core */
static PyObject* deserialize_list(PyObject* self, PyObject* args) {
std::shared_ptr<RowBatch>* data;
std::shared_ptr<RecordBatch>* data;
PyObject* base = Py_None;
if (!PyArg_ParseTuple(args, "O&|O", &PyObjectToArrow, &data, &base)) {
return NULL;

View file

@ -1,6 +1,13 @@
import subprocess
from setuptools import setup, find_packages, Extension
import setuptools.command.install as _install
from sys import platform
extension = ""
if platform == "linux" or platform == "linux2":
extension = ".so"
elif platform == "darwin":
extension = ".dylib"
# Because of relative paths, this must be run from inside numbuf/.
@ -8,7 +15,6 @@ class install(_install.install):
def run(self):
subprocess.check_call(["./setup.sh"])
subprocess.check_call(["./build.sh"])
subprocess.check_call(["cp", "libnumbuf.so", "numbuf/"])
# Calling _install.install.run(self) does not fetch required packages and
# instead performs an old-style install. See command/install.py in
# setuptools. So, calling do_egg_install() manually here.
@ -17,7 +23,7 @@ class install(_install.install):
setup(name="numbuf",
version="0.0.1",
packages=find_packages(),
package_data={"numbuf": ["libnumbuf.so"]},
package_data={"numbuf": ["libnumbuf.so", "libarrow" + extension, "libarrow_io" + extension, "libarrow_ipc" + extension]},
cmdclass={"install": install},
setup_requires=["numpy"],
include_package_data=True,

2
thirdparty/arrow vendored

@ -1 +1 @@
Subproject commit 1407126ec0956b6dc6f9873bdeeef4d042ca032f
Subproject commit 58bd7bedc63d66d5898297bab25b54dfb67665db

View file

@ -20,8 +20,7 @@ fi
echo "building arrow"
cd $TP_DIR/arrow/cpp
source setup_build_env.sh
mkdir -p $TP_DIR/arrow/cpp/build
cd $TP_DIR/arrow/cpp/build
cmake -DLIBARROW_LINKAGE=STATIC -DCMAKE_BUILD_TYPE=Release ..
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-g" -DCMAKE_CXX_FLAGS="-g" ..
make VERBOSE=1 -j$PARALLEL