Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build-sqlite
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash
set -euo pipefail

VERSION=3240000
VERSION=3352000

fetch_if_needed() {
if [ ! -e sqlite ]; then
curl --fail "https://sqlite.org/2018/sqlite-autoconf-${VERSION}.tar.gz" > sqlite.tar.gz
curl --fail "https://sqlite.org/2020/sqlite-autoconf-${VERSION}.tar.gz" > sqlite.tar.gz
tar xf sqlite.tar.gz
rm sqlite.tar.gz
mv sqlite-autoconf-${VERSION} sqlite
Expand Down
102 changes: 11 additions & 91 deletions build/Makefile.linux
Original file line number Diff line number Diff line change
Expand Up @@ -3,119 +3,39 @@ ROOT:=$(HERE)/../..
VTABLE:=$(ROOT)/parquet
SQLITE:=$(ROOT)/sqlite

# Directories
ARROW=$(HERE)/arrow
ARROW_RELEASE=$(ARROW)/cpp/release
BOOST_ROOT=$(ARROW_RELEASE)/boost_ep-prefix/src/boost_ep
BOOST=$(BOOST_ROOT)/stage/lib
BROTLI=$(ARROW_RELEASE)/brotli_ep/src/brotli_ep-install/lib/x86_64-linux-gnu
ICU=$(HERE)/icu
LZ4=$(ARROW_RELEASE)/lz4_ep-prefix/src/lz4_ep/lib
PARQUET_CPP=$(HERE)/parquet-cpp
SNAPPY=$(ARROW_RELEASE)/snappy_ep/src/snappy_ep-install/lib
ZLIB=$(ARROW_RELEASE)/zlib_ep/src/zlib_ep-install/lib
ZSTD=$(ARROW_RELEASE)/zstd_ep-prefix/src/zstd_ep/lib

# Libraries
# profile_gen, profile_build for PGO
APACHE_BUILD=release

ARROW_LIB = $(ARROW_RELEASE)/$(APACHE_BUILD)/libarrow.a
BOOST_FILESYSTEM_LIB = $(BOOST)/libboost_filesystem.a
BOOST_REGEX_LIB = $(BOOST)/libboost_regex.a
BOOST_SYSTEM_LIB = $(BOOST)/libboost_system.a
BROTLI_COMMON_LIB = $(BROTLI)/libbrotlicommon.a
BROTLI_DEC_LIB = $(BROTLI)/libbrotlidec.a
BROTLI_ENC_LIB = $(BROTLI)/libbrotlienc.a
ICU_I18N_LIB=$(ICU)/source/lib/libicui18n.a
ICU_UC_LIB=$(ICU)/source/lib/libicuuc.a
ICU_DATA_LIB=$(ICU)/source/lib/libicudata.a
LZ4_LIB = $(LZ4)/liblz4.a
PARQUET_CPP_LIB = $(PARQUET_CPP)/build/$(APACHE_BUILD)/libparquet.a
SNAPPY_LIB = $(SNAPPY)/libsnappy.a
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
ZLIB_LIB = $(ZLIB)/libz.a
ZSTD_LIB = $(ZSTD)/libzstd.a

# Flags
CC = gcc
CXX = g++
OPTIMIZATIONS = -O3
CPUS:=$(shell nproc)
CFLAGS = -I $(SQLITE) -I $(PARQUET_CPP)/src -I $(ARROW)/cpp/src $(OPTIMIZATIONS) -std=c++11 -Wall -fPIC -g
CFLAGS = -I $(SQLITE) $(OPTIMIZATIONS) -std=c++11 -Wall -fPIC -g
LIBS = -lparquet -lboost_regex -lboost_system -lboost_filesystem \
-lbrotlienc -lbrotlicommon -lbrotlidec -licui18n -licuuc -licudata \
-llz4 -lsnappy -lthrift -lz -lzstd -lcrypto -lssl

ALL_LIBS = $(PARQUET_CPP_LIB) $(LZ4_LIB) $(ZSTD_LIB) $(THRIFT_LIB) $(SNAPPY_LIB) $(ARROW_LIB) \
$(ICU_I18N_LIB) $(ICU_UC_LIB) $(ICU_DATA_LIB) \
$(BROTLI_ENC_LIB) $(BROTLI_COMMON_LIB) $(BROTLI_DEC_LIB) $(BOOST_REGEX_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB)
LDFLAGS = $(OPTIMIZATIONS) -Wl,--no-whole-archive $(LIBS) -lz -lcrypto -lssl

LDFLAGS = $(OPTIMIZATIONS) \
-Wl,--whole-archive $(ALL_LIBS) \
-Wl,--no-whole-archive -lz -lcrypto -lssl
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
LIBS = $(ARROW_LIB) $(PARQUET_CPP_LIB) $(ICU_I18N_LIB)

PROF =

libparquet.so: $(LIBS) $(OBJ)
libparquet.so: $(OBJ)
$(CXX) $(PROF) -shared -o $@ $(OBJ) $(LDFLAGS)

parquet_filter.o: $(VTABLE)/parquet_filter.cc $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
parquet_filter.o: $(VTABLE)/parquet_filter.cc $(VTABLE)/parquet_filter.h
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)

parquet_cursor.o: $(VTABLE)/parquet_cursor.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
parquet_cursor.o: $(VTABLE)/parquet_cursor.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)

parquet_table.o: $(VTABLE)/parquet_table.cc $(VTABLE)/parquet_table.h $(ARROW) $(PARQUET_CPP)
parquet_table.o: $(VTABLE)/parquet_table.cc $(VTABLE)/parquet_table.h
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)

parquet.o: $(VTABLE)/parquet.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
parquet.o: $(VTABLE)/parquet.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)

$(ARROW):
rm -rf $(ARROW)
git clone https://github.com/apache/arrow.git $(ARROW)
cd $(ARROW) && git checkout apache-arrow-0.9.0
mkdir $(ARROW)/cpp/release
cd $(ARROW)/cpp/release && cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DARROW_BOOST_VENDORED=ON -DARROW_BOOST_USE_SHARED=OFF -DPARQUET_BUILD_SHARED=OFF ..
touch -d @0 $(ARROW)

$(ARROW_LIB): $(ARROW)
cd $(ARROW)/cpp/release && make -j$(CPUS)

# This is pretty gross. I'm sure someone who knows what they're doing could do this more cleanly.
$(ICU_I18N_LIB):
rm -rf $(ICU)
mkdir $(ICU)
cd $(ICU) && wget https://github.com/unicode-org/icu/releases/download/release-$(ICU_VERSION)/icu4c-$(ICU_VERSION_U)-src.tgz
cd $(ICU) && tar xf icu4c-$(ICU_VERSION_U)-src.tgz --strip-components=1
cd $(ICU)/source && ./configure --enable-static
cd $(ICU)/source && make -j$(CPUS) LIBCFLAGS='-fPIC' LIBCXXFLAGS='-fPIC'

$(PARQUET_CPP):
rm -rf $(PARQUET_CPP)
git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
cd $(PARQUET_CPP) && BOOST_ROOT=$(BOOST_ROOT) BOOST_STATIC_REGEX_LIBRARY=$(BOOST_REGEX_LIB) SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static -DPARQUET_BOOST_USE_SHARED=OFF -DPARQUET_BUILD_SHARED=OFF .
touch -d @0 $(PARQUET_CPP)

$(PARQUET_CPP_LIB): $(PARQUET_CPP) $(ARROW_LIB)
cd $(PARQUET_CPP) && make -j$(CPUS)

.PHONY: clean arrow icu parquet publish_libs
.PHONY: clean parquet

clean:
rm -f *.o *.so

distclean:
rm -rf $(SQLITE) $(HERE)


arrow: $(ARROW_LIB)

icu: $(ICU_I18N_LIB)

parquet: $(PARQUET_CPP_LIB)

publish_libs:
tar -cJf libs.tar.xz $(ALL_LIBS) $(SQLITE)/sqlite3
s3cmd put libs.tar.xz s3://cldellow/public/libparquet/$$(lsb_release -s -r)/libs.tar.xz
76 changes: 29 additions & 47 deletions make-linux
Original file line number Diff line number Diff line change
@@ -1,31 +1,46 @@
#!/bin/bash
set -euo pipefail

apt install -y sudo lsb-release wget

here=$(dirname "${BASH_SOURCE[0]}")
here=$(readlink -f "$here")
prebuilt="$here"/build/linux/prebuilt
ubuntu="$(lsb_release -s -r)"
libs=(libarrow.a libboost_filesystem.a libboost_regex.a libboost_system.a libbrotlicommon.a libbrotlidec.a \
libbrotlienc.a libicudata.a libicui18n.a libicuuc.a liblz4.a libparquet.a libsnappy.a libthrift.a libzstd.a)
lib_locs=()
distro="$(lsb_release -s -r)"

setup_directories() {
cd "$here"
mkdir -p build/linux
mkdir -p "$prebuilt"
cp -f build/Makefile.linux build/linux/Makefile
cd build/linux
}

install_prerequisites() {
install_prerequisites_amazon_linux() {
# Install Apache Arrow <https://arrow.apache.org/install/> and dependencies.
sudo yum update -y
sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
sudo yum install -y https://apache.bintray.com/arrow/centos/7/apache-arrow-release-latest.rpm
sudo yum install -y --enablerepo=epel parquet-devel
sudo yum install -y lz4-devel thrift-devel libzstd-devel snappy-devel brotli-devel boost-devel boost-static libicu-devel openssl-devel
export CFLAGS=-D_GLIBCXX_USE_CXX11_ABI=0
}

install_prerequisites_ubuntu() {
# install Apache Arrow libs
# NOTE: Pinned to Ubuntu Focal
wget https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-focal.deb
sudo apt install -y -V ./apache-arrow-archive-keyring-latest-focal.deb
sudo apt update -y
sudo apt install -y -V libparquet-dev liblz4-dev libzstd-dev libthrift-dev \
libsnappy-dev libbrotli-dev libz-dev

# Install prereqs based on https://github.com/apache/parquet-cpp#linux
sudo apt-get install libboost-dev g++ libboost-filesystem-dev \
sudo apt install -y libboost-dev g++ libboost-filesystem-dev \
libboost-program-options-dev libboost-regex-dev \
libboost-system-dev libboost-test-dev \
libssl-dev libtool bison flex pkg-config libreadline-dev libncurses-dev

# Install prereqs based on https://github.com/apache/arrow/tree/master/cpp
sudo apt-get install cmake \
sudo apt install -y cmake \
libboost-dev \
libboost-filesystem-dev \
libboost-system-dev
Expand All @@ -38,7 +53,7 @@ build_sqlite() {
}

set_icu_version() {
case "$ubuntu" in
case "$distro" in
14.04)
export ICU_VERSION=52-1
;;
Expand All @@ -48,6 +63,9 @@ set_icu_version() {
18.04)
export ICU_VERSION=60-2
;;
20.10)
export ICU_VERSION=67-1
;;
*)
echo "unsure what libicu version to use" >&2
exit 1
Expand All @@ -56,47 +74,11 @@ set_icu_version() {
export ICU_VERSION_U=${ICU_VERSION//-/_}
}

add_prebuilt_lib() {
lib_locs+=("$1=$prebuilt/$2.a")
}

fetch_prebuilt_libs() {
if [ ! -e "$prebuilt"/complete ]; then
(
cd "$prebuilt"
curl "https://s3.amazonaws.com/cldellow/public/libparquet/$ubuntu/libs.tar.xz" > libs.tar.xz
tar xf libs.tar.xz --xform 's#.*/##'
touch "$prebuilt"/complete
)
fi

if [ ! -e "$here"/sqlite/sqlite3 ]; then
ln -s "$prebuilt"/sqlite3 "$here"/sqlite/sqlite3
fi

add_prebuilt_lib "PARQUET_CPP_LIB" libparquet
add_prebuilt_lib "LZ4_LIB" liblz4
add_prebuilt_lib "ZSTD_LIB" libzstd
add_prebuilt_lib "THRIFT_LIB" libthrift
add_prebuilt_lib "SNAPPY_LIB" libsnappy
add_prebuilt_lib "ARROW_LIB" libarrow
add_prebuilt_lib "ICU_I18N_LIB" libicui18n
add_prebuilt_lib "ICU_UC_LIB" libicuuc
add_prebuilt_lib "ICU_DATA_LIB" libicudata
add_prebuilt_lib "BROTLI_ENC_LIB" libbrotlienc
add_prebuilt_lib "BROTLI_COMMON_LIB" libbrotlicommon
add_prebuilt_lib "BROTLI_DEC_LIB" libbrotlidec
add_prebuilt_lib "BOOST_REGEX_LIB" libboost_regex
add_prebuilt_lib "BOOST_SYSTEM_LIB" libboost_system
add_prebuilt_lib "BOOST_FILESYSTEM_LIB" libboost_filesystem

}

main() {
set_icu_version
setup_directories
install_prerequisites
build_sqlite
set_icu_version

if [ -v PREBUILT ]; then
fetch_prebuilt_libs
Expand Down
2 changes: 1 addition & 1 deletion parquet/parquet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ static int parquetColumn(
case parquet::Type::BYTE_ARRAY:
{
parquet::ByteArray* rv = cursor->getByteArray(col);
if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
if(cursor->getConvertedType(col) == parquet::ConvertedType::UTF8) {
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
} else {
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
Expand Down
Loading