Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ Once the image has been built or you have downloaded the correct image, you can

In the following instructions, the variable `${DELTA_PACKAGE_VERSION}` refers to the Delta Lake Package version.

The current version is `delta-spark_2.13:4.0.0` which corresponds to Apache Spark 4.x release line.
For Spark 4.x, use Spark-version-specific Delta artifacts: `delta-spark_4.1_2.13:<version>` for Spark 4.1 and `delta-spark_4.0_2.13:<version>` for Spark 4.0 (use `delta-spark_2.13:<version>` for older Spark lines).

## Choose an Interface

Expand Down
26 changes: 24 additions & 2 deletions startup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,30 @@ source "$HOME/.cargo/env"

export PYSPARK_DRIVER_PYTHON=jupyter
export PYSPARK_DRIVER_PYTHON_OPTS='lab --ip=0.0.0.0'
export DELTA_SPARK_VERSION='4.0.1'
export DELTA_PACKAGE_VERSION=delta-spark_2.13:${DELTA_SPARK_VERSION}

# Default Delta version; can be overridden by setting DELTA_SPARK_VERSION in the environment
: "${DELTA_SPARK_VERSION:=4.1.0}"

# Detect the Spark major.minor version from the running runtime (e.g. "4.1")
SPARK_FULL_VERSION=$("${SPARK_HOME}/bin/spark-submit" --version 2>&1 \
| grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
SPARK_MAJOR_MINOR=$(echo "${SPARK_FULL_VERSION}" | cut -d. -f1,2)

# Select the Delta Maven artifact that matches this Spark version.
# Spark 4.1 and 4.0 each publish a Spark-specific artifact; older releases use the generic one.
case "${SPARK_MAJOR_MINOR}" in
4.1)
DELTA_ARTIFACT="delta-spark_4.1_2.13"
;;
4.0)
DELTA_ARTIFACT="delta-spark_4.0_2.13"
;;
Comment thread
rjain21 marked this conversation as resolved.
*)
DELTA_ARTIFACT="delta-spark_2.13"
;;
esac

export DELTA_PACKAGE_VERSION="${DELTA_ARTIFACT}:${DELTA_SPARK_VERSION}"

$SPARK_HOME/bin/pyspark --packages io.delta:${DELTA_PACKAGE_VERSION} \
--conf "spark.driver.extraJavaOptions=-Divy.cache.dir=/tmp -Divy.home=/tmp -Dio.netty.tryReflectionSetAccessible=true" \
Expand Down
31 changes: 28 additions & 3 deletions tests/test_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -172,15 +172,40 @@ run_test "spark-submit is on PATH" "spark-submit --version"
run_test "pyspark is on PATH" "pyspark --version"

# ---------------------------------------------------------------
# 6. Rust toolchain
# 6. startup.sh artifact resolution
# ---------------------------------------------------------------

section "startup.sh artifact resolution"
run_test_verbose "startup.sh selects Spark 4.1 Delta artifact" \
"set -euo pipefail
mkdir -p /tmp/mock-spark/bin
printf '%s\n' '#!/usr/bin/env bash' 'echo \"Spark version 4.1.1\" >&2' > /tmp/mock-spark/bin/spark-submit
printf '%s\n' '#!/usr/bin/env bash' 'echo \"\$*\"' > /tmp/mock-spark/bin/pyspark
chmod +x /tmp/mock-spark/bin/spark-submit /tmp/mock-spark/bin/pyspark
startup_output=\$(SPARK_HOME=/tmp/mock-spark DELTA_SPARK_VERSION=4.1.0 bash startup.sh 2>&1)
echo \"\$startup_output\"
[[ \"\$startup_output\" == *\"--packages io.delta:delta-spark_4.1_2.13:4.1.0\"* ]]"

run_test_verbose "startup.sh selects Spark 4.0 Delta artifact" \
"set -euo pipefail
mkdir -p /tmp/mock-spark/bin
printf '%s\n' '#!/usr/bin/env bash' 'echo \"Spark version 4.0.3\" >&2' > /tmp/mock-spark/bin/spark-submit
printf '%s\n' '#!/usr/bin/env bash' 'echo \"\$*\"' > /tmp/mock-spark/bin/pyspark
chmod +x /tmp/mock-spark/bin/spark-submit /tmp/mock-spark/bin/pyspark
startup_output=\$(SPARK_HOME=/tmp/mock-spark DELTA_SPARK_VERSION=4.1.0 bash startup.sh 2>&1)
echo \"\$startup_output\"
[[ \"\$startup_output\" == *\"--packages io.delta:delta-spark_4.0_2.13:4.1.0\"* ]]"

# ---------------------------------------------------------------
# 7. Rust toolchain
# ---------------------------------------------------------------

section "Rust Toolchain"
run_test "rustc is available" 'source "$HOME/.cargo/env" && rustc --version'
run_test "cargo is available" 'source "$HOME/.cargo/env" && cargo --version'

# ---------------------------------------------------------------
# 7. Functional: delta-rs (Python) write/read via Polars
# 8. Functional: delta-rs (Python) write/read via Polars
# ---------------------------------------------------------------

section "Functional: delta-rs + Polars"
Expand Down Expand Up @@ -215,7 +240,7 @@ print('Polars Delta append OK')
\""

# ---------------------------------------------------------------
# 8. Functional: deltalake Python API
# 9. Functional: deltalake Python API
# ---------------------------------------------------------------

section "Functional: deltalake Python API"
Expand Down